Load the data

Clean the environment an load thr datasets that we are going to use.

rm(list=ls())
# Who dataset: A set of social, economic, health, and political indicators
who = read.csv('WHO.csv')
# Economic freedom index dataset: A dataset with plenty of economic indicators. 
economic_freedom = read.csv('index2022_data.csv')

Libraries

if (!require("mice")){
  install.packages("mice")
}
## Loading required package: mice
## 
## Attaching package: 'mice'
## The following object is masked from 'package:stats':
## 
##     filter
## The following objects are masked from 'package:base':
## 
##     cbind, rbind
library(mice)

if (!require("kernlab")){
  install.packages('kernlab')
}
## Loading required package: kernlab
library(kernlab)

if (!require("countrycode")){
  install.packages('countrycode')
}
## Loading required package: countrycode
library(countrycode)

if (!require("rworldmap")){
  install.packages('rworldmap')
}
## Loading required package: rworldmap
## Loading required package: sp
## ### Welcome to rworldmap ###
## For a short introduction type :   vignette('rworldmap')
library(rworldmap)

if (!require("factoextra")){
  install.packages('factoextra')
}
## Loading required package: factoextra
## Loading required package: ggplot2
## 
## Attaching package: 'ggplot2'
## The following object is masked from 'package:kernlab':
## 
##     alpha
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(factoextra)

if (!require("igraph")){
  install.packages('igraph')
}
## Loading required package: igraph
## 
## Attaching package: 'igraph'
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## The following object is masked from 'package:base':
## 
##     union
library(igraph)

if (!require("factoextra")){
  install.packages('factoextra')
}
library(factoextra)

if (!require("cluster")){
  install.packages('cluster')
}
## Loading required package: cluster
library(cluster)

if (!require("mclust")){
  install.packages('mclust')
}
## Loading required package: mclust
## Package 'mclust' version 5.4.10
## Type 'citation("mclust")' for citing this R package in publications.
library(mclust)

if (!require("GGally")){
  install.packages('GGally')
}
## Loading required package: GGally
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
library(GGally) 

if (!require("tidyverse")){
  install.packages('tidyverse')
}
## Loading required package: tidyverse
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.4.1 
## ✔ readr   2.1.2      ✔ forcats 0.5.2 
## ✔ purrr   0.3.4      
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ ggplot2::alpha()       masks kernlab::alpha()
## ✖ dplyr::as_data_frame() masks tibble::as_data_frame(), igraph::as_data_frame()
## ✖ purrr::compose()       masks igraph::compose()
## ✖ purrr::cross()         masks mclust::cross(), kernlab::cross()
## ✖ tidyr::crossing()      masks igraph::crossing()
## ✖ dplyr::filter()        masks mice::filter(), stats::filter()
## ✖ dplyr::groups()        masks igraph::groups()
## ✖ dplyr::lag()           masks stats::lag()
## ✖ purrr::map()           masks mclust::map()
## ✖ purrr::simplify()      masks igraph::simplify()
library(tidyverse)

if (!require("VIM")){
  install.packages('VIM')
}
## Loading required package: VIM
## Loading required package: colorspace
## Loading required package: grid
## VIM is ready to use.
## 
## Suggestions and bug-reports can be submitted at: https://github.com/statistikat/VIM/issues
## 
## Attaching package: 'VIM'
## 
## The following object is masked from 'package:mclust':
## 
##     diabetes
## 
## The following object is masked from 'package:datasets':
## 
##     sleep
library(VIM) 

if (!require("Quandl")){
  install.packages('Quandl')
}
## Loading required package: Quandl
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## 
## Attaching package: 'xts'
## 
## The following objects are masked from 'package:dplyr':
## 
##     first, last
library(Quandl) 

if (!require("lubridate")){
  install.packages('lubridate')
}
## Loading required package: lubridate
## 
## Attaching package: 'lubridate'
## 
## The following objects are masked from 'package:igraph':
## 
##     %--%, union
## 
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(lubridate) 

if (!require("quantmod")){
  install.packages('quantmod')
}
## Loading required package: quantmod
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(quantmod) 

if (!require("ggpubr")){
  install.packages('ggpubr')
}
## Loading required package: ggpubr
library("ggpubr")

if (!require("outliers")){
  install.packages('outliers')
}
## Loading required package: outliers
library(outliers)

if (!require("ggplot2")){
  install.packages("ggplot2")
}
library(ggplot2)
if (!require("Amelia")){
  install.packages("Amelia")
}
## Loading required package: Amelia
## Loading required package: Rcpp
## ## 
## ## Amelia II: Multiple Imputation
## ## (Version 1.8.0, built: 2021-05-26)
## ## Copyright (C) 2005-2022 James Honaker, Gary King and Matthew Blackwell
## ## Refer to http://gking.harvard.edu/amelia/ for more information
## ##
library("Amelia")

Let’s have a look at the datasets

head(who)
head(economic_freedom)
summary(who)
##    Country            CountryID        Continent    
##  Length:202         Min.   :  1.00   Min.   :1.000  
##  Class :character   1st Qu.: 51.25   1st Qu.:2.000  
##  Mode  :character   Median :101.50   Median :3.000  
##                     Mean   :101.50   Mean   :3.579  
##                     3rd Qu.:151.75   3rd Qu.:5.000  
##                     Max.   :202.00   Max.   :7.000  
##                                                     
##  Adolescent.fertility.rate.... Adult.literacy.rate....
##  Min.   :  0.00                Min.   :23.60          
##  1st Qu.: 19.00                1st Qu.:68.40          
##  Median : 46.00                Median :86.50          
##  Mean   : 59.46                Mean   :78.87          
##  3rd Qu.: 91.00                3rd Qu.:95.30          
##  Max.   :199.00                Max.   :99.80          
##  NA's   :25                    NA's   :71             
##  Gross.national.income.per.capita..PPP.international...
##  Min.   :  260                                         
##  1st Qu.: 2112                                         
##  Median : 6175                                         
##  Mean   :11250                                         
##  3rd Qu.:14502                                         
##  Max.   :60870                                         
##  NA's   :24                                            
##  Net.primary.school.enrolment.ratio.female....
##  Min.   :  6.00                               
##  1st Qu.: 79.00                               
##  Median : 90.00                               
##  Mean   : 84.03                               
##  3rd Qu.: 96.00                               
##  Max.   :100.00                               
##  NA's   :23                                   
##  Net.primary.school.enrolment.ratio.male.... Population..in.thousands..total
##  Min.   : 11.0                               Min.   :      2                
##  1st Qu.: 79.5                               1st Qu.:   1340                
##  Median : 90.0                               Median :   6762                
##  Mean   : 85.7                               Mean   :  34098                
##  3rd Qu.: 96.0                               3rd Qu.:  21732                
##  Max.   :100.0                               Max.   :1328474                
##  NA's   :23                                  NA's   :9                      
##  Population.annual.growth.rate.... Population.in.urban.areas....
##  Min.   :-2.500                    Min.   : 10.00               
##  1st Qu.: 0.500                    1st Qu.: 36.00               
##  Median : 1.300                    Median : 57.00               
##  Mean   : 1.298                    Mean   : 54.91               
##  3rd Qu.: 2.100                    3rd Qu.: 73.00               
##  Max.   : 4.300                    Max.   :100.00               
##  NA's   :9                         NA's   :9                    
##  Population.living.below.the.poverty.line....living.on..lt..US.1.per.day.
##  Min.   : 2.00                                                           
##  1st Qu.: 2.00                                                           
##  Median : 7.45                                                           
##  Mean   :16.02                                                           
##  3rd Qu.:23.05                                                           
##  Max.   :70.80                                                           
##  NA's   :130                                                             
##  Population.median.age..years. Population.proportion.over.60....
##  Min.   :15.00                 Min.   : 2.0                     
##  1st Qu.:20.00                 1st Qu.: 5.0                     
##  Median :25.00                 Median : 8.0                     
##  Mean   :26.74                 Mean   :10.3                     
##  3rd Qu.:35.00                 3rd Qu.:15.0                     
##  Max.   :43.00                 Max.   :27.0                     
##  NA's   :23                    NA's   :9                        
##  Population.proportion.under.15.... Registration.coverage.of.births....
##  Min.   :14.00                      Min.   : 3.00                      
##  1st Qu.:20.00                      1st Qu.:72.00                      
##  Median :31.00                      Median :90.00                      
##  Mean   :30.11                      Mean   :77.26                      
##  3rd Qu.:39.00                      3rd Qu.:90.00                      
##  Max.   :49.00                      Max.   :90.00                      
##  NA's   :9                          NA's   :39                         
##  Total.fertility.rate..per.woman.
##  Min.   :1.200                   
##  1st Qu.:1.800                   
##  Median :2.500                   
##  Mean   :3.005                   
##  3rd Qu.:3.900                   
##  Max.   :7.300                   
##  NA's   :10                      
##  Antenatal.care.coverage...at.least.four.visits....
##  Min.   : 7                                        
##  1st Qu.:41                                        
##  Median :61                                        
##  Mean   :58                                        
##  3rd Qu.:76                                        
##  Max.   :99                                        
##  NA's   :117                                       
##  Antiretroviral.therapy.coverage.among.HIV.infected.pregt.women.for.PMTCT....
##  Min.   : 1.00                                                               
##  1st Qu.: 7.75                                                               
##  Median :14.00                                                               
##  Mean   :22.53                                                               
##  3rd Qu.:27.50                                                               
##  Max.   :95.00                                                               
##  NA's   :166                                                                 
##  Antiretroviral.therapy.coverage.among.people.with.advanced.HIV.infections....
##  Min.   : 1.00                                                                
##  1st Qu.:12.00                                                                
##  Median :22.00                                                                
##  Mean   :28.36                                                                
##  3rd Qu.:38.50                                                                
##  Max.   :95.00                                                                
##  NA's   :99                                                                   
##  Births.attended.by.skilled.health.personnel....
##  Min.   :  6.00                                 
##  1st Qu.: 60.00                                 
##  Median : 95.00                                 
##  Mean   : 79.52                                 
##  3rd Qu.:100.00                                 
##  Max.   :100.00                                 
##  NA's   :21                                     
##  Births.by.caesarean.section....
##  Min.   : 0.00                  
##  1st Qu.: 3.00                  
##  Median : 9.00                  
##  Mean   :10.22                  
##  3rd Qu.:16.00                  
##  Max.   :31.00                  
##  NA's   :125                    
##  Children.aged.6.59.months.who.received.vitamin.A.supplementation....
##  Min.   :11.10                                                       
##  1st Qu.:33.70                                                       
##  Median :49.80                                                       
##  Mean   :51.22                                                       
##  3rd Qu.:68.20                                                       
##  Max.   :84.10                                                       
##  NA's   :177                                                         
##  Children.aged..lt.5.years.sleeping.under.insecticide.treated.nets....
##  Min.   : 0.100                                                       
##  1st Qu.: 1.325                                                       
##  Median : 5.600                                                       
##  Mean   : 9.891                                                       
##  3rd Qu.:13.075                                                       
##  Max.   :49.000                                                       
##  NA's   :156                                                          
##  Children.aged..lt.5.years.who.received.any.antimalarial.treatment.for.fever....
##  Min.   : 0.20                                                                  
##  1st Qu.: 8.10                                                                  
##  Median :30.95                                                                  
##  Mean   :29.63                                                                  
##  3rd Qu.:48.45                                                                  
##  Max.   :62.70                                                                  
##  NA's   :152                                                                    
##  Children.aged..lt.5.years.with.ARI.symptoms.taken.to.facility....
##  Min.   : 6.50                                                    
##  1st Qu.:33.92                                                    
##  Median :44.70                                                    
##  Mean   :44.48                                                    
##  3rd Qu.:55.98                                                    
##  Max.   :76.40                                                    
##  NA's   :166                                                      
##  Children.aged..lt.5.years.with.diarrhoea.receiving.ORT....
##  Min.   :31.90                                             
##  1st Qu.:53.50                                             
##  Median :58.90                                             
##  Mean   :58.59                                             
##  3rd Qu.:66.80                                             
##  Max.   :83.40                                             
##  NA's   :165                                               
##  Contraceptive.prevalence....
##  Min.   : 2.80               
##  1st Qu.:25.70               
##  Median :44.35               
##  Mean   :44.45               
##  3rd Qu.:65.75               
##  Max.   :90.20               
##  NA's   :94                  
##  Neonates.protected.at.birth.against.neonatal.tetanus..PAB.....
##  Min.   : 5.00                                                 
##  1st Qu.:72.00                                                 
##  Median :83.00                                                 
##  Mean   :78.93                                                 
##  3rd Qu.:89.00                                                 
##  Max.   :96.00                                                 
##  NA's   :98                                                    
##  One.year.olds.immunized.with.MCV
##  Min.   :23.00                   
##  1st Qu.:80.00                   
##  Median :92.00                   
##  Mean   :86.81                   
##  3rd Qu.:97.00                   
##  Max.   :99.00                   
##  NA's   :9                       
##  One.year.olds.immunized.with.three.doses.of.diphtheria.tetanus.toxoid.and.pertussis..DTP3.....
##  Min.   :20.00                                                                                 
##  1st Qu.:83.00                                                                                 
##  Median :94.00                                                                                 
##  Mean   :87.61                                                                                 
##  3rd Qu.:97.00                                                                                 
##  Max.   :99.00                                                                                 
##  NA's   :9                                                                                     
##  One.year.olds.immunized.with.three.doses.of.Hepatitis.B..HepB3.....
##  Min.   : 4.00                                                      
##  1st Qu.:82.50                                                      
##  Median :92.00                                                      
##  Mean   :86.37                                                      
##  3rd Qu.:97.00                                                      
##  Max.   :99.00                                                      
##  NA's   :31                                                         
##  One.year.olds.immunized.with.three.doses.of.Hib..Hib3..vaccine....
##  Min.   :11.00                                                     
##  1st Qu.:84.25                                                     
##  Median :94.00                                                     
##  Mean   :88.21                                                     
##  3rd Qu.:97.00                                                     
##  Max.   :99.00                                                     
##  NA's   :88                                                        
##  Tuberculosis.detection.rate.under.DOTS....
##  Min.   :  0.00                            
##  1st Qu.: 42.00                            
##  Median : 62.00                            
##  Mean   : 62.57                            
##  3rd Qu.: 80.00                            
##  Max.   :284.00                            
##  NA's   :15                                
##  Tuberculosis.treatment.success.under.DOTS....
##  Min.   :  0.00                               
##  1st Qu.: 71.00                               
##  Median : 80.00                               
##  Mean   : 77.59                               
##  3rd Qu.: 87.00                               
##  Max.   :100.00                               
##  NA's   :25                                   
##  Women.who.have.had.mammography.... Women.who.have.had.PAP.smear....
##  Min.   : 0                         Min.   : 0.00                   
##  1st Qu.: 2                         1st Qu.: 6.75                   
##  Median :16                         Median :40.00                   
##  Mean   :29                         Mean   :38.04                   
##  3rd Qu.:54                         3rd Qu.:67.25                   
##  Max.   :98                         Max.   :83.00                   
##  NA's   :129                        NA's   :130                     
##  Community.and.traditional.health.workers.density..per.10.000.population.
##  Min.   : 1.000                                                          
##  1st Qu.: 2.000                                                          
##  Median : 4.000                                                          
##  Mean   : 9.194                                                          
##  3rd Qu.:14.000                                                          
##  Max.   :43.000                                                          
##  NA's   :171                                                             
##  Dentistry.personnel.density..per.10.000.population.
##  Min.   : 1.000                                     
##  1st Qu.: 2.000                                     
##  Median : 5.000                                     
##  Mean   : 5.211                                     
##  3rd Qu.: 8.000                                     
##  Max.   :16.000                                     
##  NA's   :88                                         
##  Environment.and.public.health.workers.density..per.10.000.population.
##  Min.   : 1.00                                                        
##  1st Qu.: 1.00                                                        
##  Median : 2.00                                                        
##  Mean   : 2.81                                                        
##  3rd Qu.: 3.00                                                        
##  Max.   :10.00                                                        
##  NA's   :181                                                          
##  External.resources.for.health.as.percentage.of.total.expenditure.on.health
##  Min.   : 0.000                                                            
##  1st Qu.: 0.000                                                            
##  Median : 1.300                                                            
##  Mean   : 9.717                                                            
##  3rd Qu.:13.800                                                            
##  Max.   :73.100                                                            
##  NA's   :11                                                                
##  General.government.expenditure.on.health.as.percentage.of.total.expenditure.on.health
##  Min.   :12.30                                                                        
##  1st Qu.:44.80                                                                        
##  Median :62.80                                                                        
##  Mean   :60.11                                                                        
##  3rd Qu.:76.60                                                                        
##  Max.   :98.60                                                                        
##  NA's   :9                                                                            
##  General.government.expenditure.on.health.as.percentage.of.total.government.expenditure
##  Min.   : 1.30                                                                         
##  1st Qu.: 7.40                                                                         
##  Median :10.70                                                                         
##  Mean   :11.06                                                                         
##  3rd Qu.:14.00                                                                         
##  Max.   :29.80                                                                         
##  NA's   :9                                                                             
##  Hospital.beds..per.10.000.population.
##  Min.   :  1.00                       
##  1st Qu.: 12.00                       
##  Median : 26.00                       
##  Mean   : 32.17                       
##  3rd Qu.: 48.25                       
##  Max.   :141.00                       
##  NA's   :22                           
##  Laboratory.health.workers.density..per.10.000.population.
##  Min.   : 1.000                                           
##  1st Qu.: 2.000                                           
##  Median : 3.000                                           
##  Mean   : 4.568                                           
##  3rd Qu.: 5.000                                           
##  Max.   :23.000                                           
##  NA's   :165                                              
##  Number.of.community.and.traditional.health.workers
##  Min.   :     0                                    
##  1st Qu.:   133                                    
##  Median :   968                                    
##  Mean   : 10636                                    
##  3rd Qu.:  5528                                    
##  Max.   :115761                                    
##  NA's   :151                                       
##  Number.of.dentistry.personnel Number.of.environment.and.public.health.workers
##  Min.   :     1.0              Min.   :     9                                 
##  1st Qu.:    58.5              1st Qu.:   101                                 
##  Median :   850.0              Median :   238                                 
##  Mean   :  9901.0              Mean   :  5142                                 
##  3rd Qu.:  4484.2              3rd Qu.:  1541                                 
##  Max.   :463663.0              Max.   :167080                                 
##  NA's   :12                    NA's   :129                                    
##  Number.of.laboratory.health.workers Number.of.nursing.and.midwifery.personnel
##  Min.   :    17                      Min.   :     22                          
##  1st Qu.:   195                      1st Qu.:   2499                          
##  Median :   690                      Median :  12840                          
##  Mean   : 15658                      Mean   :  93414                          
##  3rd Qu.:  3816                      3rd Qu.:  46930                          
##  Max.   :651035                      Max.   :2669603                          
##  NA's   :123                         NA's   :9                                
##  Number.of.other.health.service.providers Number.of.pharmaceutical.personnel
##  Min.   :      4                          Min.   :     1                    
##  1st Qu.:    566                          1st Qu.:   108                    
##  Median :   1960                          Median :  1002                    
##  Mean   : 100960                          Mean   : 14090                    
##  3rd Qu.:  19142                          3rd Qu.:  5046                    
##  Max.   :4138567                          Max.   :559408                    
##  NA's   :107                              NA's   :36                        
##  Number.of.physicians
##  Min.   :      4     
##  1st Qu.:    345     
##  Median :   5187     
##  Mean   :  43591     
##  3rd Qu.:  28812     
##  Max.   :1862630     
##  NA's   :9           
##  Nursing.and.midwifery.personnel.density..per.10.000.population.
##  Min.   :  1.00                                                 
##  1st Qu.:  9.00                                                 
##  Median : 29.00                                                 
##  Mean   : 43.05                                                 
##  3rd Qu.: 57.00                                                 
##  Max.   :955.00                                                 
##  NA's   :9                                                      
##  Other.health.service.providers.density..per.10.000.population.
##  Min.   :  1.00                                                
##  1st Qu.:  3.00                                                
##  Median :  7.50                                                
##  Mean   : 17.21                                                
##  3rd Qu.: 18.25                                                
##  Max.   :145.00                                                
##  NA's   :132                                                   
##  Out.of.pocket.expenditure.as.percentage.of.private.expenditure.on.health
##  Min.   : 14.40                                                          
##  1st Qu.: 72.50                                                          
##  Median : 86.70                                                          
##  Mean   : 80.44                                                          
##  3rd Qu.: 96.00                                                          
##  Max.   :100.00                                                          
##  NA's   :9                                                               
##  Per.capita.government.expenditure.on.health..PPP.int....
##  Min.   :   4                                            
##  1st Qu.:  65                                            
##  Median : 219                                            
##  Mean   : 587                                            
##  3rd Qu.: 565                                            
##  Max.   :5309                                            
##  NA's   :9                                               
##  Per.capita.government.expenditure.on.health.at.average.exchange.rate..US..
##  Min.   :   0.0                                                            
##  1st Qu.:  19.0                                                            
##  Median : 108.0                                                            
##  Mean   : 558.6                                                            
##  3rd Qu.: 381.0                                                            
##  Max.   :5991.0                                                            
##  NA's   :9                                                                 
##  Per.capita.total.expenditure.on.health..PPP.int....
##  Min.   :  15.0                                     
##  1st Qu.: 116.0                                     
##  Median : 353.0                                     
##  Mean   : 847.3                                     
##  3rd Qu.: 869.0                                     
##  Max.   :7154.0                                     
##  NA's   :9                                          
##  Per.capita.total.expenditure.on.health.at.average.exchange.rate..US..
##  Min.   :   0.0                                                       
##  1st Qu.:  42.0                                                       
##  Median : 217.0                                                       
##  Mean   : 774.8                                                       
##  3rd Qu.: 568.0                                                       
##  Max.   :6714.0                                                       
##  NA's   :9                                                            
##  Pharmaceutical.personnel.density..per.10.000.population.
##  Min.   : 1.000                                          
##  1st Qu.: 2.000                                          
##  Median : 5.000                                          
##  Mean   : 5.728                                          
##  3rd Qu.: 8.000                                          
##  Max.   :20.000                                          
##  NA's   :99                                              
##  Physicians.density..per.10.000.population.
##  Min.   :  1.00                            
##  1st Qu.:  5.00                            
##  Median : 13.00                            
##  Mean   : 19.77                            
##  3rd Qu.: 27.00                            
##  Max.   :474.00                            
##  NA's   :32                                
##  Private.expenditure.on.health.as.percentage.of.total.expenditure.on.health
##  Min.   : 1.40                                                             
##  1st Qu.:23.40                                                             
##  Median :37.20                                                             
##  Mean   :39.89                                                             
##  3rd Qu.:55.20                                                             
##  Max.   :87.70                                                             
##  NA's   :9                                                                 
##  Private.prepaid.plans.as.percentage.of.private.expenditure.on.health
##  Min.   : 0.00                                                       
##  1st Qu.: 0.10                                                       
##  Median : 5.20                                                       
##  Mean   :10.93                                                       
##  3rd Qu.:14.20                                                       
##  Max.   :79.50                                                       
##  NA's   :32                                                          
##  Ratio.of.health.management.and.support.workers.to.health.service.providers
##  Min.   : 0.000                                                            
##  1st Qu.: 0.100                                                            
##  Median : 0.200                                                            
##  Mean   : 1.285                                                            
##  3rd Qu.: 0.400                                                            
##  Max.   :69.700                                                            
##  NA's   :132                                                               
##  Ratio.of.nurses.and.midwives.to.physicians
##  Min.   : 0.100                            
##  1st Qu.: 2.000                            
##  Median : 3.000                            
##  Mean   : 4.846                            
##  3rd Qu.: 5.300                            
##  Max.   :39.400                            
##  NA's   :9                                 
##  Social.security.expenditure.on.health.as.percentage.of.general.government.expenditure.on.health
##  Min.   : 0.00                                                                                  
##  1st Qu.: 0.00                                                                                  
##  Median : 2.95                                                                                  
##  Mean   :24.46                                                                                  
##  3rd Qu.:45.98                                                                                  
##  Max.   :98.40                                                                                  
##  NA's   :26                                                                                     
##  Total.expenditure.on.health.as.percentage.of.gross.domestic.product
##  Min.   : 1.500                                                     
##  1st Qu.: 4.500                                                     
##  Median : 6.000                                                     
##  Mean   : 6.309                                                     
##  3rd Qu.: 7.600                                                     
##  Max.   :16.400                                                     
##  NA's   :9                                                          
##  Births.attended.by.skilled.health.personnel.....highest.educational.level.of.mother
##  Min.   :28.00                                                                      
##  1st Qu.:79.08                                                                      
##  Median :88.25                                                                      
##  Mean   :84.50                                                                      
##  3rd Qu.:94.55                                                                      
##  Max.   :99.80                                                                      
##  NA's   :142                                                                        
##  Births.attended.by.skilled.health.personnel.....highest.wealth.quintile
##  Min.   : 26.60                                                         
##  1st Qu.: 84.17                                                         
##  Median : 91.15                                                         
##  Mean   : 86.16                                                         
##  3rd Qu.: 98.10                                                         
##  Max.   :100.00                                                         
##  NA's   :146                                                            
##  Births.attended.by.skilled.health.personnel.....lowest.educational.level.of.mother
##  Min.   : 2.30                                                                     
##  1st Qu.:21.55                                                                     
##  Median :33.60                                                                     
##  Mean   :40.61                                                                     
##  3rd Qu.:54.45                                                                     
##  Max.   :99.70                                                                     
##  NA's   :142                                                                       
##  Births.attended.by.skilled.health.personnel.....lowest.wealth.quintile
##  Min.   : 0.70                                                         
##  1st Qu.:14.65                                                         
##  Median :28.85                                                         
##  Mean   :37.06                                                         
##  3rd Qu.:51.73                                                         
##  Max.   :99.20                                                         
##  NA's   :146                                                           
##  Births.attended.by.skilled.health.personnel.....rural
##  Min.   : 2.60                                        
##  1st Qu.:27.48                                        
##  Median :39.70                                        
##  Mean   :47.37                                        
##  3rd Qu.:68.53                                        
##  Max.   :99.50                                        
##  NA's   :142                                          
##  Births.attended.by.skilled.health.personnel.....urban
##  Min.   :29.60                                        
##  1st Qu.:73.12                                        
##  Median :83.55                                        
##  Mean   :79.91                                        
##  3rd Qu.:92.45                                        
##  Max.   :99.60                                        
##  NA's   :142                                          
##  Births.attended.by.skilled.health.personnel.difference.highest.lowest.educational.level.of.mother
##  Min.   :-1.40                                                                                    
##  1st Qu.:34.02                                                                                    
##  Median :49.25                                                                                    
##  Mean   :43.89                                                                                    
##  3rd Qu.:57.77                                                                                    
##  Max.   :75.90                                                                                    
##  NA's   :142                                                                                      
##  Births.attended.by.skilled.health.personnel.difference.highest.lowest.wealth.quintile
##  Min.   :-0.70                                                                        
##  1st Qu.:37.55                                                                        
##  Median :52.50                                                                        
##  Mean   :49.10                                                                        
##  3rd Qu.:66.25                                                                        
##  Max.   :83.10                                                                        
##  NA's   :146                                                                          
##  Births.attended.by.skilled.health.personnel.difference.urban.rural
##  Min.   :-1.10                                                     
##  1st Qu.:22.23                                                     
##  Median :34.95                                                     
##  Mean   :32.53                                                     
##  3rd Qu.:43.48                                                     
##  Max.   :62.40                                                     
##  NA's   :142                                                       
##  Births.attended.by.skilled.health.personnel.ratio.highest.lowest.educational.level.of.mother
##  Min.   : 1.000                                                                              
##  1st Qu.: 1.750                                                                              
##  Median : 2.600                                                                              
##  Mean   : 3.342                                                                              
##  3rd Qu.: 3.825                                                                              
##  Max.   :25.100                                                                              
##  NA's   :142                                                                                 
##  Births.attended.by.skilled.health.personnel.ratio.highest.lowest.wealth.quintile
##  Min.   : 1.000                                                                  
##  1st Qu.: 1.800                                                                  
##  Median : 3.150                                                                  
##  Mean   : 4.912                                                                  
##  3rd Qu.: 5.775                                                                  
##  Max.   :38.000                                                                  
##  NA's   :146                                                                     
##  Births.attended.by.skilled.health.personnel.ratio.urban.rural
##  Min.   : 1.000                                               
##  1st Qu.: 1.300                                               
##  Median : 1.850                                               
##  Mean   : 2.513                                               
##  3rd Qu.: 2.825                                               
##  Max.   :17.200                                               
##  NA's   :142                                                  
##  Measles.immunization.coverage.among.one.year.olds.....highest.educational.level.of.mother
##  Min.   :53.70                                                                            
##  1st Qu.:77.25                                                                            
##  Median :85.25                                                                            
##  Mean   :83.43                                                                            
##  3rd Qu.:90.90                                                                            
##  Max.   :99.10                                                                            
##  NA's   :142                                                                              
##  Measles.immunization.coverage.among.one.year.olds.....highest.wealth.quintile
##  Min.   :38.10                                                                
##  1st Qu.:73.92                                                                
##  Median :84.55                                                                
##  Mean   :81.62                                                                
##  3rd Qu.:90.05                                                                
##  Max.   :97.80                                                                
##  NA's   :146                                                                  
##  Measles.immunization.coverage.among.one.year.olds.....lowest.educational.level.of.mother
##  Min.   :15.60                                                                           
##  1st Qu.:44.58                                                                           
##  Median :62.85                                                                           
##  Mean   :59.24                                                                           
##  3rd Qu.:73.20                                                                           
##  Max.   :96.00                                                                           
##  NA's   :142                                                                             
##  Measles.immunization.coverage.among.one.year.olds.....lowest.wealth.quintile
##  Min.   : 8.20                                                               
##  1st Qu.:46.50                                                               
##  Median :64.80                                                               
##  Mean   :60.29                                                               
##  3rd Qu.:76.62                                                               
##  Max.   :95.10                                                               
##  NA's   :146                                                                 
##  Measles.immunization.coverage.among.one.year.olds.....rural
##  Min.   :19.20                                              
##  1st Qu.:55.17                                              
##  Median :70.95                                              
##  Mean   :66.83                                              
##  3rd Qu.:80.97                                              
##  Max.   :96.50                                              
##  NA's   :142                                                
##  Measles.immunization.coverage.among.one.year.olds.....urban
##  Min.   :37.50                                              
##  1st Qu.:71.40                                              
##  Median :80.25                                              
##  Mean   :77.86                                              
##  3rd Qu.:86.03                                              
##  Max.   :96.80                                              
##  NA's   :142                                                
##  Measles.immunization.coverage.among.one.year.olds.difference.highest.lowest.educational.level.of.mother
##  Min.   :-7.60                                                                                          
##  1st Qu.:13.72                                                                                          
##  Median :23.80                                                                                          
##  Mean   :24.19                                                                                          
##  3rd Qu.:33.98                                                                                          
##  Max.   :50.90                                                                                          
##  NA's   :142                                                                                            
##  Measles.immunization.coverage.among.one.year.olds.difference.highest.lowest.wealth.quintile
##  Min.   :-11.20                                                                             
##  1st Qu.: 11.00                                                                             
##  Median : 20.20                                                                             
##  Mean   : 21.33                                                                             
##  3rd Qu.: 33.25                                                                             
##  Max.   : 57.20                                                                             
##  NA's   :146                                                                                
##  Measles.immunization.coverage.among.one.year.olds.difference.urban.rural
##  Min.   :-13.400                                                         
##  1st Qu.:  4.225                                                         
##  Median :  9.250                                                         
##  Mean   : 11.030                                                         
##  3rd Qu.: 18.075                                                         
##  Max.   : 38.100                                                         
##  NA's   :142                                                             
##  Measles.immunization.coverage.among.one.year.olds.ratio.highest.lowest.educational.level.of.mother
##  Min.   :0.90                                                                                      
##  1st Qu.:1.20                                                                                      
##  Median :1.40                                                                                      
##  Mean   :1.55                                                                                      
##  3rd Qu.:1.80                                                                                      
##  Max.   :4.30                                                                                      
##  NA's   :142                                                                                       
##  Measles.immunization.coverage.among.one.year.olds.ratio.highest.lowest.wealth.quintile
##  Min.   :0.800                                                                         
##  1st Qu.:1.100                                                                         
##  Median :1.300                                                                         
##  Mean   :1.589                                                                         
##  3rd Qu.:1.700                                                                         
##  Max.   :4.600                                                                         
##  NA's   :146                                                                           
##  Measles.immunization.coverage.among.one.year.olds.ratio.urban.rural
##  Min.   :0.800                                                      
##  1st Qu.:1.075                                                      
##  Median :1.100                                                      
##  Mean   :1.227                                                      
##  3rd Qu.:1.300                                                      
##  Max.   :2.100                                                      
##  NA's   :142                                                        
##  Under.5.mortality.rate..Probability.of.dying.aged..lt..5.years.per.1.000.live.births..difference.lowest.highest.educational.level.of.mother
##  Min.   :  1.40                                                                                                                             
##  1st Qu.: 38.20                                                                                                                             
##  Median : 63.60                                                                                                                             
##  Mean   : 65.17                                                                                                                             
##  3rd Qu.: 85.00                                                                                                                             
##  Max.   :162.20                                                                                                                             
##  NA's   :141                                                                                                                                
##  Under.5.mortality.rate..Probability.of.dying.aged..lt..5.years.per.1.000.live.births..difference.lowest.highest.wealth.quintile
##  Min.   :-11.00                                                                                                                 
##  1st Qu.: 37.55                                                                                                                 
##  Median : 50.35                                                                                                                 
##  Mean   : 58.38                                                                                                                 
##  3rd Qu.: 73.50                                                                                                                 
##  Max.   :178.00                                                                                                                 
##  NA's   :146                                                                                                                    
##  Under.5.mortality.rate..Probability.of.dying.aged..lt..5.years.per.1.000.live.births..difference.rural.urban
##  Min.   :-14.5                                                                                               
##  1st Qu.: 19.4                                                                                               
##  Median : 30.3                                                                                               
##  Mean   : 33.1                                                                                               
##  3rd Qu.: 42.3                                                                                               
##  Max.   : 91.4                                                                                               
##  NA's   :141                                                                                                 
##  Under.5.mortality.rate..Probability.of.dying.aged..lt..5.years.per.1.000.live.births..highest.educational.level.of.mother
##  Min.   : 19.7                                                                                                            
##  1st Qu.: 35.1                                                                                                            
##  Median : 64.7                                                                                                            
##  Mean   : 62.6                                                                                                            
##  3rd Qu.: 85.5                                                                                                            
##  Max.   :143.0                                                                                                            
##  NA's   :141                                                                                                              
##  Under.5.mortality.rate..Probability.of.dying.aged..lt..5.years.per.1.000.live.births..highest.wealth.quintile
##  Min.   : 15.80                                                                                               
##  1st Qu.: 30.05                                                                                               
##  Median : 64.50                                                                                               
##  Mean   : 66.38                                                                                               
##  3rd Qu.: 92.10                                                                                               
##  Max.   :187.00                                                                                               
##  NA's   :146                                                                                                  
##  Under.5.mortality.rate..Probability.of.dying.aged..lt..5.years.per.1.000.live.births..lowest.educational.level.of.mother
##  Min.   : 31.0                                                                                                           
##  1st Qu.: 83.6                                                                                                           
##  Median :122.6                                                                                                           
##  Mean   :127.8                                                                                                           
##  3rd Qu.:164.2                                                                                                           
##  Max.   :269.4                                                                                                           
##  NA's   :141                                                                                                             
##  Under.5.mortality.rate..Probability.of.dying.aged..lt..5.years.per.1.000.live.births..lowest.wealth.quintile
##  Min.   : 29.0                                                                                               
##  1st Qu.: 77.9                                                                                               
##  Median :117.5                                                                                               
##  Mean   :124.8                                                                                               
##  3rd Qu.:173.0                                                                                               
##  Max.   :257.0                                                                                               
##  NA's   :146                                                                                                 
##  Under.5.mortality.rate..Probability.of.dying.aged..lt..5.years.per.1.000.live.births..ratio.lowest.highest.educational.level.of.mother
##  Min.   :1.000                                                                                                                         
##  1st Qu.:1.800                                                                                                                         
##  Median :2.100                                                                                                                         
##  Mean   :2.156                                                                                                                         
##  3rd Qu.:2.500                                                                                                                         
##  Max.   :3.700                                                                                                                         
##  NA's   :141                                                                                                                           
##  Under.5.mortality.rate..Probability.of.dying.aged..lt..5.years.per.1.000.live.births..ratio.lowest.highest.wealth.quintile
##  Min.   :0.900                                                                                                             
##  1st Qu.:1.600                                                                                                             
##  Median :2.000                                                                                                             
##  Mean   :2.211                                                                                                             
##  3rd Qu.:2.925                                                                                                             
##  Max.   :5.300                                                                                                             
##  NA's   :146                                                                                                               
##  Under.5.mortality.rate..Probability.of.dying.aged..lt..5.years.per.1.000.live.births..ratio.rural.urban
##  Min.   :0.900                                                                                          
##  1st Qu.:1.300                                                                                          
##  Median :1.400                                                                                          
##  Mean   :1.454                                                                                          
##  3rd Qu.:1.600                                                                                          
##  Max.   :2.200                                                                                          
##  NA's   :141                                                                                            
##  Under.5.mortality.rate..Probability.of.dying.aged..lt..5.years.per.1.000.live.births..rural
##  Min.   : 29.6                                                                              
##  1st Qu.: 69.4                                                                              
##  Median :111.0                                                                              
##  Mean   :114.9                                                                              
##  3rd Qu.:157.4                                                                              
##  Max.   :253.2                                                                              
##  NA's   :141                                                                                
##  Under.5.mortality.rate..Probability.of.dying.aged..lt..5.years.per.1.000.live.births..urban
##  Min.   : 16.20                                                                             
##  1st Qu.: 44.80                                                                             
##  Median : 80.70                                                                             
##  Mean   : 81.84                                                                             
##  3rd Qu.:115.40                                                                             
##  Max.   :184.60                                                                             
##  NA's   :141                                                                                
##  Adult.mortality.rate..probability.of.dying.between.15.to.60.years.per.1000.population..both.sexes
##  Min.   : 48.0                                                                                    
##  1st Qu.:121.0                                                                                    
##  Median :186.0                                                                                    
##  Mean   :222.6                                                                                    
##  3rd Qu.:282.0                                                                                    
##  Max.   :751.0                                                                                    
##  NA's   :9                                                                                        
##  Adult.mortality.rate..probability.of.dying.between.15.to.60.years.per.1000.population..female
##  Min.   : 37.0                                                                                
##  1st Qu.: 84.0                                                                                
##  Median :138.0                                                                                
##  Mean   :184.3                                                                                
##  3rd Qu.:239.0                                                                                
##  Max.   :755.0                                                                                
##  NA's   :9                                                                                    
##  Adult.mortality.rate..probability.of.dying.between.15.to.60.years.per.1000.population..male
##  Min.   : 59                                                                                
##  1st Qu.:151                                                                                
##  Median :233                                                                                
##  Mean   :260                                                                                
##  3rd Qu.:329                                                                                
##  Max.   :798                                                                                
##  NA's   :9                                                                                  
##  Age.standardized.mortality.rate.for.cancer..per.100.000.population.
##  Min.   : 52.0                                                      
##  1st Qu.:112.5                                                      
##  Median :133.0                                                      
##  Mean   :131.1                                                      
##  3rd Qu.:149.0                                                      
##  Max.   :306.0                                                      
##  NA's   :11                                                         
##  Age.standardized.mortality.rate.for.cardiovascular.diseases..per.100.000.population.
##  Min.   :106.0                                                                       
##  1st Qu.:257.5                                                                       
##  Median :393.0                                                                       
##  Mean   :371.8                                                                       
##  3rd Qu.:444.0                                                                       
##  Max.   :844.0                                                                       
##  NA's   :11                                                                          
##  Age.standardized.mortality.rate.for.injuries..per.100.000.population.
##  Min.   : 12.00                                                       
##  1st Qu.: 41.00                                                       
##  Median : 69.00                                                       
##  Mean   : 81.16                                                       
##  3rd Qu.:104.50                                                       
##  Max.   :301.00                                                       
##  NA's   :11                                                           
##  Age.standardized.mortality.rate.for.non.communicable.diseases..per.100.000.population.
##  Min.   : 287.0                                                                        
##  1st Qu.: 565.0                                                                        
##  Median : 728.0                                                                        
##  Mean   : 702.0                                                                        
##  3rd Qu.: 831.5                                                                        
##  Max.   :1269.0                                                                        
##  NA's   :11                                                                            
##  Deaths.among.children.under.five.years.of.age.due.to.diarrhoeal.diseases....
##  Min.   : 0.000                                                              
##  1st Qu.: 1.100                                                              
##  Median :10.600                                                              
##  Mean   : 9.309                                                              
##  3rd Qu.:15.600                                                              
##  Max.   :37.800                                                              
##  NA's   :15                                                                  
##  Deaths.among.children.under.five.years.of.age.due.to.HIV.AIDS....
##  Min.   : 0.000                                                   
##  1st Qu.: 0.000                                                   
##  Median : 0.300                                                   
##  Mean   : 3.323                                                   
##  3rd Qu.: 2.600                                                   
##  Max.   :57.100                                                   
##  NA's   :15                                                       
##  Deaths.among.children.under.five.years.of.age.due.to.injuries....
##  Min.   : 0.000                                                   
##  1st Qu.: 2.350                                                   
##  Median : 3.800                                                   
##  Mean   : 5.084                                                   
##  3rd Qu.: 6.700                                                   
##  Max.   :19.400                                                   
##  NA's   :15                                                       
##  Deaths.among.children.under.five.years.of.age.due.to.malaria....
##  Min.   : 0.00                                                   
##  1st Qu.: 0.00                                                   
##  Median : 0.20                                                   
##  Mean   : 4.07                                                   
##  3rd Qu.: 0.90                                                   
##  Max.   :33.00                                                   
##  NA's   :15                                                      
##  Deaths.among.children.under.five.years.of.age.due.to.measles....
##  Min.   :0.000                                                   
##  1st Qu.:0.000                                                   
##  Median :0.100                                                   
##  Mean   :1.334                                                   
##  3rd Qu.:2.250                                                   
##  Max.   :8.100                                                   
##  NA's   :15                                                      
##  Deaths.among.children.under.five.years.of.age.due.to.neonatal.causes....
##  Min.   : 2.80                                                           
##  1st Qu.:31.75                                                           
##  Median :43.10                                                           
##  Mean   :43.20                                                           
##  3rd Qu.:52.80                                                           
##  Max.   :99.90                                                           
##  NA's   :15                                                              
##  Deaths.among.children.under.five.years.of.age.due.to.other.causes....
##  Min.   : 0.00                                                        
##  1st Qu.:12.00                                                        
##  Median :23.90                                                        
##  Mean   :22.44                                                        
##  3rd Qu.:32.15                                                        
##  Max.   :74.90                                                        
##  NA's   :15                                                           
##  Deaths.among.children.under.five.years.of.age.due.to.pneumonia....
##  Min.   : 0.00                                                     
##  1st Qu.: 3.75                                                     
##  Median :11.50                                                     
##  Mean   :11.24                                                     
##  3rd Qu.:18.45                                                     
##  Max.   :30.30                                                     
##  NA's   :15                                                        
##  Deaths.due.to.HIV.AIDS..per.100.000.population.per.year.
##  Min.   :   1.0                                          
##  1st Qu.:  10.0                                          
##  Median :  33.0                                          
##  Mean   : 131.1                                          
##  3rd Qu.: 129.0                                          
##  Max.   :1550.0                                          
##  NA's   :67                                              
##  Deaths.due.to.tuberculosis.among.HIV.negative.people..per.100.000.population.
##  Min.   :  0.00                                                               
##  1st Qu.:  2.00                                                               
##  Median :  8.00                                                               
##  Mean   : 20.38                                                               
##  3rd Qu.: 32.00                                                               
##  Max.   :115.00                                                               
##  NA's   :9                                                                    
##  Deaths.due.to.tuberculosis.among.HIV.positive.people..per.100.000.population.
##  Min.   :  0.000                                                              
##  1st Qu.:  0.000                                                              
##  Median :  0.000                                                              
##  Mean   :  8.687                                                              
##  3rd Qu.:  5.000                                                              
##  Max.   :184.000                                                              
##  NA's   :55                                                                   
##  Healthy.life.expectancy..HALE..at.birth..years..both.sexes
##  Min.   :29.00                                             
##  1st Qu.:50.00                                             
##  Median :60.00                                             
##  Mean   :57.37                                             
##  3rd Qu.:65.00                                             
##  Max.   :75.00                                             
##  NA's   :11                                                
##  Healthy.life.expectancy..HALE..at.birth..years..female
##  Min.   :30.00                                         
##  1st Qu.:51.00                                         
##  Median :62.00                                         
##  Mean   :58.92                                         
##  3rd Qu.:67.50                                         
##  Max.   :78.00                                         
##  NA's   :11                                            
##  Healthy.life.expectancy..HALE..at.birth..years..male
##  Min.   :27.00                                       
##  1st Qu.:49.00                                       
##  Median :58.00                                       
##  Mean   :55.84                                       
##  3rd Qu.:63.00                                       
##  Max.   :72.00                                       
##  NA's   :11                                          
##  Incidence.of.tuberculosis..per.100.000.population.per.year.
##  Min.   :   2.0                                             
##  1st Qu.:  19.0                                             
##  Median :  62.0                                             
##  Mean   : 142.9                                             
##  3rd Qu.: 204.0                                             
##  Max.   :1155.0                                             
##  NA's   :9                                                  
##  Infant.mortality.rate..per.1.000.live.births..both.sexes
##  Min.   :  2.00                                          
##  1st Qu.:  9.00                                          
##  Median : 23.00                                          
##  Mean   : 38.04                                          
##  3rd Qu.: 59.00                                          
##  Max.   :165.00                                          
##  NA's   :9                                               
##  Infant.mortality.rate..per.1.000.live.births..female
##  Min.   :  2.0                                       
##  1st Qu.:  9.0                                       
##  Median : 20.0                                       
##  Mean   : 34.9                                       
##  3rd Qu.: 54.0                                       
##  Max.   :154.0                                       
##  NA's   :9                                           
##  Infant.mortality.rate..per.1.000.live.births..male
##  Min.   :  3.00                                    
##  1st Qu.:  9.00                                    
##  Median : 24.00                                    
##  Mean   : 41.01                                    
##  3rd Qu.: 63.00                                    
##  Max.   :176.00                                    
##  NA's   :9                                         
##  Life.expectancy.at.birth..years..both.sexes
##  Min.   :40.00                              
##  1st Qu.:61.00                              
##  Median :70.00                              
##  Mean   :67.27                              
##  3rd Qu.:75.00                              
##  Max.   :83.00                              
##  NA's   :9                                  
##  Life.expectancy.at.birth..years..female Life.expectancy.at.birth..years..male
##  Min.   :42.00                           Min.   :39.00                        
##  1st Qu.:63.00                           1st Qu.:59.00                        
##  Median :73.00                           Median :67.00                        
##  Mean   :69.72                           Mean   :64.92                        
##  3rd Qu.:78.00                           3rd Qu.:72.00                        
##  Max.   :86.00                           Max.   :80.00                        
##  NA's   :9                               NA's   :9                            
##  Maternal.mortality.ratio..per.100.000.live.births.
##  Min.   :   1.0                                    
##  1st Qu.:  15.0                                    
##  Median : 130.0                                    
##  Mean   : 322.4                                    
##  3rd Qu.: 510.0                                    
##  Max.   :2100.0                                    
##  NA's   :33                                        
##  Neonatal.mortality.rate..per.1.000.live.births.
##  Min.   : 1.00                                  
##  1st Qu.: 5.00                                  
##  Median :14.00                                  
##  Mean   :19.82                                  
##  3rd Qu.:32.00                                  
##  Max.   :66.00                                  
##  NA's   :11                                     
##  Number.of.confirmed.poliomyelitis.cases
##  Min.   :  0.00                         
##  1st Qu.:  0.00                         
##  Median :  0.00                         
##  Mean   : 25.74                         
##  3rd Qu.:  0.75                         
##  Max.   :756.00                         
##  NA's   :156                            
##  Prevalence.of.HIV.among.adults.aged..gt..15.years..per.100.000.population.
##  Min.   :   52.0                                                           
##  1st Qu.:  115.5                                                           
##  Median :  480.0                                                           
##  Mean   : 2291.7                                                           
##  3rd Qu.: 1819.5                                                           
##  Max.   :34457.0                                                           
##  NA's   :59                                                                
##  Prevalence.of.tuberculosis..per.100.000.population.
##  Min.   :   2.0                                     
##  1st Qu.:  24.0                                     
##  Median :  80.0                                     
##  Mean   : 196.5                                     
##  3rd Qu.: 299.0                                     
##  Max.   :1300.0                                     
##  NA's   :9                                          
##  Under.5.mortality.rate..probability.of.dying.by.age.5.per.1000.live.births..both.sexes
##  Min.   :  3.00                                                                        
##  1st Qu.: 10.00                                                                        
##  Median : 26.00                                                                        
##  Mean   : 54.79                                                                        
##  3rd Qu.: 76.00                                                                        
##  Max.   :269.00                                                                        
##  NA's   :9                                                                             
##  Under.5.mortality.rate..probability.of.dying.by.age.5.per.1000.live.births..female
##  Min.   :  2.00                                                                    
##  1st Qu.: 10.00                                                                    
##  Median : 25.00                                                                    
##  Mean   : 51.52                                                                    
##  3rd Qu.: 75.00                                                                    
##  Max.   :254.00                                                                    
##  NA's   :9                                                                         
##  Under.5.mortality.rate..probability.of.dying.by.age.5.per.1000.live.births..male
##  Min.   :  3.00                                                                  
##  1st Qu.: 10.00                                                                  
##  Median : 28.00                                                                  
##  Mean   : 57.86                                                                  
##  3rd Qu.: 80.00                                                                  
##  Max.   :286.00                                                                  
##  NA's   :9                                                                       
##  Years.of.life.lost.to.communicable.diseases....
##  Min.   : 3.00                                  
##  1st Qu.:11.00                                  
##  Median :31.00                                  
##  Mean   :39.34                                  
##  3rd Qu.:68.50                                  
##  Max.   :93.00                                  
##  NA's   :11                                     
##  Years.of.life.lost.to.injuries....
##  Min.   : 2.00                     
##  1st Qu.: 9.00                     
##  Median :12.00                     
##  Mean   :13.24                     
##  3rd Qu.:17.00                     
##  Max.   :40.00                     
##  NA's   :11                        
##  Years.of.life.lost.to.non.communicable.diseases....
##  Min.   : 4.00                                      
##  1st Qu.:21.50                                      
##  Median :52.00                                      
##  Mean   :47.46                                      
##  3rd Qu.:70.50                                      
##  Max.   :87.00                                      
##  NA's   :11                                         
##  Children.under.five.years.of.age.overweight.for.age....
##  Min.   : 0.600                                         
##  1st Qu.: 4.400                                         
##  Median : 5.900                                         
##  Mean   : 7.578                                         
##  3rd Qu.: 9.200                                         
##  Max.   :30.000                                         
##  NA's   :91                                             
##  Children.under.five.years.of.age.stunted.for.age....
##  Min.   : 1.20                                       
##  1st Qu.:18.18                                       
##  Median :31.25                                       
##  Mean   :30.91                                       
##  3rd Qu.:43.88                                       
##  Max.   :63.10                                       
##  NA's   :90                                          
##  Children.under.five.years.of.age.underweight.for.age....
##  Min.   : 1.100                                          
##  1st Qu.: 5.025                                          
##  Median :14.800                                          
##  Mean   :16.235                                          
##  3rd Qu.:23.650                                          
##  Max.   :43.500                                          
##  NA's   :90                                              
##  Newborns.with.low.birth.weight....
##  Min.   : 3.00                     
##  1st Qu.: 7.00                     
##  Median : 9.00                     
##  Mean   :10.76                     
##  3rd Qu.:13.75                     
##  Max.   :32.00                     
##  NA's   :28                        
##  Per.capita.recorded.alcohol.consumption..litres.of.pure.alcohol..among.adults...gt..15.years.
##  Min.   : 0.000                                                                               
##  1st Qu.: 1.230                                                                               
##  Median : 3.725                                                                               
##  Mean   : 4.519                                                                               
##  3rd Qu.: 6.982                                                                               
##  Max.   :15.560                                                                               
##  NA's   :22                                                                                   
##  Population.using.solid.fuels.....rural Population.using.solid.fuels.....urban
##  Min.   :  0.00                         Min.   : 0.00                         
##  1st Qu.: 34.75                         1st Qu.: 3.75                         
##  Median : 82.50                         Median :23.50                         
##  Mean   : 65.58                         Mean   :34.01                         
##  3rd Qu.: 98.00                         3rd Qu.:63.75                         
##  Max.   :100.00                         Max.   :99.00                         
##  NA's   :130                            NA's   :130                           
##  Population.with.sustainable.access.to.improved.drinking.water.sources.....rural
##  Min.   : 10.00                                                                 
##  1st Qu.: 61.00                                                                 
##  Median : 82.00                                                                 
##  Mean   : 77.21                                                                 
##  3rd Qu.: 97.00                                                                 
##  Max.   :100.00                                                                 
##  NA's   :23                                                                     
##  Population.with.sustainable.access.to.improved.drinking.water.sources.....total
##  Min.   : 22.00                                                                 
##  1st Qu.: 73.00                                                                 
##  Median : 91.00                                                                 
##  Mean   : 84.04                                                                 
##  3rd Qu.: 99.00                                                                 
##  Max.   :100.00                                                                 
##  NA's   :24                                                                     
##  Population.with.sustainable.access.to.improved.drinking.water.sources.....urban
##  Min.   : 37.00                                                                 
##  1st Qu.: 90.00                                                                 
##  Median : 98.00                                                                 
##  Mean   : 92.73                                                                 
##  3rd Qu.:100.00                                                                 
##  Max.   :100.00                                                                 
##  NA's   :17                                                                     
##  Population.with.sustainable.access.to.improved.sanitation.....rural
##  Min.   :  3.00                                                     
##  1st Qu.: 30.75                                                     
##  Median : 62.50                                                     
##  Mean   : 60.62                                                     
##  3rd Qu.: 95.00                                                     
##  Max.   :100.00                                                     
##  NA's   :30                                                         
##  Population.with.sustainable.access.to.improved.sanitation.....total
##  Min.   :  5.00                                                     
##  1st Qu.: 41.00                                                     
##  Median : 78.00                                                     
##  Mean   : 67.67                                                     
##  3rd Qu.: 95.00                                                     
##  Max.   :100.00                                                     
##  NA's   :33                                                         
##  Population.with.sustainable.access.to.improved.sanitation.....urban
##  Min.   : 14.00                                                     
##  1st Qu.: 58.50                                                     
##  Median : 88.00                                                     
##  Mean   : 77.03                                                     
##  3rd Qu.: 98.00                                                     
##  Max.   :100.00                                                     
##  NA's   :27                                                         
##  Prevalence.of.adults...gt..15.years..who.are.obese.....female
##  Min.   : 0.70                                                
##  1st Qu.: 6.00                                                
##  Median :12.50                                                
##  Mean   :15.19                                                
##  3rd Qu.:19.00                                                
##  Max.   :74.90                                                
##  NA's   :111                                                  
##  Prevalence.of.adults...gt..15.years..who.are.obese.....male
##  Min.   : 0.70                                              
##  1st Qu.: 8.35                                              
##  Median :13.25                                              
##  Mean   :15.72                                              
##  3rd Qu.:18.23                                              
##  Max.   :57.40                                              
##  NA's   :148                                                
##  Prevalence.of.condom.use.by.young.people..15.24.years..at.higher.risk.sex.....female
##  Min.   : 5.00                                                                       
##  1st Qu.:21.00                                                                       
##  Median :29.50                                                                       
##  Mean   :32.27                                                                       
##  3rd Qu.:41.75                                                                       
##  Max.   :75.00                                                                       
##  NA's   :172                                                                         
##  Prevalence.of.condom.use.by.young.people..15.24.years..at.higher.risk.sex.....male
##  Min.   :12.00                                                                     
##  1st Qu.:38.00                                                                     
##  Median :47.00                                                                     
##  Mean   :47.20                                                                     
##  3rd Qu.:54.75                                                                     
##  Max.   :88.00                                                                     
##  NA's   :172                                                                       
##  Prevalence.of.current.tobacco.use.among.adolescents..13.15.years......both.sexes
##  Min.   : 2.20                                                                   
##  1st Qu.:11.70                                                                   
##  Median :17.20                                                                   
##  Mean   :18.78                                                                   
##  3rd Qu.:23.80                                                                   
##  Max.   :59.70                                                                   
##  NA's   :65                                                                      
##  Prevalence.of.current.tobacco.use.among.adolescents..13.15.years......female
##  Min.   : 1.00                                                               
##  1st Qu.: 8.20                                                               
##  Median :13.20                                                               
##  Mean   :15.38                                                               
##  3rd Qu.:20.40                                                               
##  Max.   :54.10                                                               
##  NA's   :65                                                                  
##  Prevalence.of.current.tobacco.use.among.adolescents..13.15.years......male
##  Min.   : 3.20                                                             
##  1st Qu.:14.90                                                             
##  Median :20.50                                                             
##  Mean   :21.92                                                             
##  3rd Qu.:27.80                                                             
##  Max.   :65.80                                                             
##  NA's   :65                                                                
##  Prevalence.of.current.tobacco.use.among.adults...gt..15.years......both.sexes
##  Min.   : 4.30                                                                
##  1st Qu.:15.10                                                                
##  Median :25.60                                                                
##  Mean   :24.77                                                                
##  3rd Qu.:31.90                                                                
##  Max.   :51.80                                                                
##  NA's   :73                                                                   
##  Prevalence.of.current.tobacco.use.among.adults...gt..15.years......female
##  Min.   : 0.30                                                            
##  1st Qu.: 3.40                                                            
##  Median : 9.80                                                            
##  Mean   :14.05                                                            
##  3rd Qu.:24.50                                                            
##  Max.   :52.40                                                            
##  NA's   :71                                                               
##  Prevalence.of.current.tobacco.use.among.adults...gt..15.years......male
##  Min.   : 7.60                                                          
##  1st Qu.:25.95                                                          
##  Median :34.80                                                          
##  Mean   :35.40                                                          
##  3rd Qu.:44.15                                                          
##  Max.   :70.10                                                          
##  NA's   :71                                                             
##  Adolescent_fertility_rate Agricultural_land
##  Min.   :  1.46            Min.   : 0.58    
##  1st Qu.: 18.34            1st Qu.:23.95    
##  Median : 42.08            Median :40.21    
##  Mean   : 55.87            Mean   :40.39    
##  3rd Qu.: 80.68            3rd Qu.:57.94    
##  Max.   :225.50            Max.   :90.58    
##  NA's   :21                NA's   :17       
##  Agriculture_contribution_to_economy   Aid_given        Aid_received     
##  Min.   : 0.000                      Min.   :  212.0   Min.   : -65.940  
##  1st Qu.: 3.692                      1st Qu.:  766.2   1st Qu.:   8.275  
##  Median :10.160                      Median : 2118.0   Median :  35.320  
##  Mean   :15.074                      Mean   : 3614.2   Mean   :  86.615  
##  3rd Qu.:22.445                      3rd Qu.: 3833.5   3rd Qu.:  66.547  
##  Max.   :65.970                      Max.   :19705.0   Max.   :1514.510  
##  NA's   :24                          NA's   :180       NA's   :48        
##  Aid_received_total   All_forms_of_TB_new_cases_per_100_000_estimated
##  Min.   :-1.650e+08   Min.   :   3.00                                
##  1st Qu.: 4.728e+07   1st Qu.:  21.25                                
##  Median : 1.995e+08   Median :  61.00                                
##  Mean   : 5.640e+08   Mean   : 143.75                                
##  3rd Qu.: 5.440e+08   3rd Qu.: 204.00                                
##  Max.   : 2.210e+10   Max.   :1141.00                                
##  NA's   :38           NA's   :28                                     
##  All_forms_of_TB_new_cases_per_100_000_reported
##  Min.   :  0.00                                
##  1st Qu.: 13.25                                
##  Median : 41.00                                
##  Mean   : 83.83                                
##  3rd Qu.:100.75                                
##  Max.   :739.00                                
##  NA's   :28                                    
##  Annual_freshwater_withdrawals_total  Arms_exports        Arms_imports      
##  Min.   :   0.00                     Min.   :0.000e+00   Min.   :0.000e+00  
##  1st Qu.:   1.47                     1st Qu.:4.000e+06   1st Qu.:5.000e+06  
##  Median :   6.58                     Median :1.250e+07   Median :1.800e+07  
##  Mean   :  86.86                     Mean   :2.852e+08   Mean   :1.382e+08  
##  3rd Qu.:  34.19                     3rd Qu.:5.825e+07   3rd Qu.:9.600e+07  
##  Max.   :3794.44                     Max.   :7.090e+09   Max.   :3.790e+09  
##  NA's   :49                          NA's   :120         NA's   :37         
##  Bad_teeth_per_child Births_attended_by_skilled_health_staff
##  Min.   :0.180       Min.   :  5.70                         
##  1st Qu.:1.110       1st Qu.: 60.62                         
##  Median :1.700       Median : 91.65                         
##  Mean   :2.025       Mean   : 79.08                         
##  3rd Qu.:2.800       3rd Qu.: 99.40                         
##  Max.   :6.000       Max.   :100.00                         
##  NA's   :27          NA's   :28                             
##  Breast_cancer_deaths_per_100_000_women
##  Min.   : 2.00                         
##  1st Qu.:10.38                         
##  Median :14.20                         
##  Mean   :14.29                         
##  3rd Qu.:18.01                         
##  Max.   :29.60                         
##  NA's   :30                            
##  Breast_cancer_new_cases_per_100_000_women
##  Min.   :  3.90                           
##  1st Qu.: 20.55                           
##  Median : 29.90                           
##  Mean   : 37.21                           
##  3rd Qu.: 49.73                           
##  Max.   :101.10                           
##  NA's   :30                               
##  Breast_cancer_number_of_female_deaths Breast_cancer_number_of_new_female_cases
##  Min.   :    7                         Min.   :    16                          
##  1st Qu.:  156                         1st Qu.:   293                          
##  Median :  549                         Median :  1177                          
##  Mean   : 2385                         Mean   :  6689                          
##  3rd Qu.: 1598                         3rd Qu.:  3862                          
##  Max.   :44795                         Max.   :209995                          
##  NA's   :31                            NA's   :31                              
##  Broadband_subscribers Broadband_subscribers_per_100_people CO2_emissions    
##  Min.   :       0      Min.   : 0.000                       Min.   : 0.0100  
##  1st Qu.:     192      1st Qu.: 0.065                       1st Qu.: 0.6275  
##  Median :    8970      Median : 0.825                       Median : 2.3850  
##  Mean   : 1122746      Mean   : 4.453                       Mean   : 5.1146  
##  3rd Qu.:  168902      3rd Qu.: 4.225                       3rd Qu.: 7.0925  
##  Max.   :48000000      Max.   :29.080                       Max.   :57.7200  
##  NA's   :14            NA's   :50                           NA's   :16       
##  CO2_intensity_of_economic_output Capital_formation  
##  Min.   :0.010                    Min.   :0.000e+00  
##  1st Qu.:0.190                    1st Qu.:4.240e+09  
##  Median :0.310                    Median :6.170e+10  
##  Mean   :0.415                    Mean   :7.202e+12  
##  3rd Qu.:0.500                    3rd Qu.:3.700e+11  
##  Max.   :2.810                    Max.   :3.900e+14  
##  NA's   :29                       NA's   :41         
##  Cell_phones_per_100_people Cell_phones_total   Central_bank_discount_rate
##  Min.   :  0.30             Min.   :        0   Min.   :  0.00            
##  1st Qu.:  9.75             1st Qu.:   177250   1st Qu.:  4.00            
##  Median : 37.50             Median :   905542   Median :  7.00            
##  Mean   : 44.61             Mean   :  9165871   Mean   : 13.72            
##  3rd Qu.: 77.77             3rd Qu.:  4787298   3rd Qu.: 12.00            
##  Max.   :154.80             Max.   :335000000   Max.   :540.00            
##  NA's   :15                 NA's   :14          NA's   :61                
##  Cervical_cancer_deaths_per_100_000_women
##  Min.   : 0.640                          
##  1st Qu.: 3.507                          
##  Median : 8.050                          
##  Mean   :12.914                          
##  3rd Qu.:23.000                          
##  Max.   :55.600                          
##  NA's   :30                              
##  Cervical_cancer_new_cases_per_100_000_women
##  Min.   : 2.00                              
##  1st Qu.:10.88                              
##  Median :20.25                              
##  Mean   :23.18                              
##  3rd Qu.:30.43                              
##  Max.   :87.30                              
##  NA's   :30                                 
##  Cervical_cancer_number_of_female_deaths
##  Min.   :    3                          
##  1st Qu.:  111                          
##  Median :  354                          
##  Mean   : 1590                          
##  3rd Qu.: 1287                          
##  Max.   :74118                          
##  NA's   :31                             
##  Cervical_cancer_number_of_new_female_cases Children_and_elderly
##  Min.   :     5.0                           Min.   : 26.50      
##  1st Qu.:   220.5                           1st Qu.: 48.28      
##  Median :   654.0                           Median : 57.20      
##  Mean   :  2865.7                           Mean   : 62.84      
##  3rd Qu.:  1956.5                           3rd Qu.: 78.20      
##  Max.   :132082.0                           Max.   :107.67      
##  NA's   :31                                 NA's   :34          
##  Children_out_of_school_primary Children_out_of_school_primary_female
##  Min.   :     37                Min.   :      0                      
##  1st Qu.:   8656                1st Qu.:   2688                      
##  Median :  34216                Median :  15064                      
##  Mean   : 360415                Mean   : 202990                      
##  3rd Qu.: 223811                3rd Qu.: 110107                      
##  Max.   :8096824                Max.   :4712631                      
##  NA's   :38                     NA's   :40                           
##  Children_out_of_school_primary_male Children_per_woman Coal_consumption 
##  Min.   :      0                     Min.   :0.910      Min.   :   0.05  
##  1st Qu.:   2732                     1st Qu.:1.815      1st Qu.:   1.48  
##  Median :  15645                     Median :2.400      Median :   6.06  
##  Mean   : 167267                     Mean   :2.966      Mean   :  47.88  
##  3rd Qu.: 107337                     3rd Qu.:3.842      3rd Qu.:  26.68  
##  Max.   :3549652                     Max.   :7.190      Max.   :1088.80  
##  NA's   :38                          NA's   :34         NA's   :143      
##  Coal_consumption_per_person Coal_production    Coal_production_per_person
##  Min.   :0.0000              Min.   :   0.240   Min.   : 0.00             
##  1st Qu.:0.0550              1st Qu.:   4.138   1st Qu.: 0.15             
##  Median :0.3400              Median :  12.640   Median : 0.30             
##  Mean   :0.5215              Mean   :  88.892   Mean   : 0.97             
##  3rd Qu.:0.7800              3rd Qu.:  57.102   3rd Qu.: 0.90             
##  Max.   :2.7300              Max.   :1119.830   Max.   :10.28             
##  NA's   :144                 NA's   :170        NA's   :171               
##  Colon_and_Rectum_cancer_deaths_per_100_000_men
##  Min.   : 0.600                                
##  1st Qu.: 4.000                                
##  Median : 6.270                                
##  Mean   : 8.958                                
##  3rd Qu.:13.550                                
##  Max.   :34.070                                
##  NA's   :30                                    
##  Colon_and_Rectum_cancer_deaths_per_100_000_women
##  Min.   : 0.600                                  
##  1st Qu.: 3.075                                  
##  Median : 5.250                                  
##  Mean   : 6.198                                  
##  3rd Qu.: 9.207                                  
##  Max.   :17.040                                  
##  NA's   :30                                      
##  Colon_and_Rectum_cancer_new_cases_per_100_000_men
##  Min.   : 1.00                                    
##  1st Qu.: 5.10                                    
##  Median :10.25                                    
##  Mean   :16.20                                    
##  3rd Qu.:25.65                                    
##  Max.   :58.50                                    
##  NA's   :30                                       
##  Colon_and_Rectum_cancer_new_cases_per_100_000_women
##  Min.   : 0.90                                      
##  1st Qu.: 3.80                                      
##  Median : 8.10                                      
##  Mean   :12.03                                      
##  3rd Qu.:18.12                                      
##  Max.   :42.20                                      
##  NA's   :30                                         
##  Colon_and_Rectum_cancer_number_of_female_deaths
##  Min.   :    0.0                                
##  1st Qu.:   51.5                                
##  Median :  193.0                                
##  Mean   : 1453.9                                
##  3rd Qu.:  815.0                                
##  Max.   :35902.0                                
##  NA's   :31                                     
##  Colon_and_Rectum_cancer_number_of_male_deaths
##  Min.   :    2.0                              
##  1st Qu.:   58.5                              
##  Median :  216.0                              
##  Mean   : 1614.7                              
##  3rd Qu.:  980.5                              
##  Max.   :50200.0                              
##  NA's   :31                                   
##  Colon_and_Rectum_cancer_number_of_new_female_cases
##  Min.   :    1                                     
##  1st Qu.:   68                                     
##  Median :  282                                     
##  Mean   : 2746                                     
##  3rd Qu.: 1422                                     
##  Max.   :80427                                     
##  NA's   :31                                        
##  Colon_and_Rectum_cancer_number_of_new_male_cases Consumer_price_index
##  Min.   :    2.0                                  Min.   :  80.0      
##  1st Qu.:   69.5                                  1st Qu.: 112.0      
##  Median :  339.0                                  Median : 119.0      
##  Mean   : 3195.5                                  Mean   : 150.4      
##  3rd Qu.: 1524.0                                  3rd Qu.: 142.0      
##  Max.   :88142.0                                  Max.   :1873.0      
##  NA's   :31                                       NA's   :45          
##  Contraceptive_use Deaths_from_TB_per_100_000_estimated Debt_servicing_costs
##  Min.   : 2.80     Min.   :  0.00                       Min.   : 1.000      
##  1st Qu.:30.40     1st Qu.:  3.00                       1st Qu.: 4.000      
##  Median :49.60     Median :  8.50                       Median : 7.000      
##  Mean   :47.77     Mean   : 27.53                       Mean   : 8.923      
##  3rd Qu.:66.97     3rd Qu.: 37.00                       3rd Qu.:11.000      
##  Max.   :96.00     Max.   :271.00                       Max.   :71.000      
##  NA's   :40        NA's   :28                           NA's   :72          
##  Democracy_score   Electric_power_consumption Electricity_generation
##  Min.   :-10.000   Min.   :   34.39           Min.   :   8.68       
##  1st Qu.: -3.000   1st Qu.:  717.84           1st Qu.:  44.05       
##  Median :  6.000   Median : 2116.55           Median :  91.16       
##  Mean   :  3.355   Mean   : 4021.08           Mean   : 276.55       
##  3rd Qu.:  9.000   3rd Qu.: 5726.85           3rd Qu.: 230.22       
##  Max.   : 10.000   Max.   :27986.52           Max.   :4257.37       
##  NA's   :47        NA's   :71                 NA's   :139           
##  Electricity_generation_per_person   Energy_use     
##  Min.   :  156.9                   Min.   :  157.8  
##  1st Qu.: 2540.4                   1st Qu.:  632.2  
##  Median : 5548.2                   Median : 1444.7  
##  Mean   : 6990.5                   Mean   : 2702.9  
##  3rd Qu.: 8514.2                   3rd Qu.: 3635.9  
##  Max.   :30061.3                   Max.   :19877.3  
##  NA's   :140                       NA's   :71       
##  Expenditure_per_student_primary Expenditure_per_student_secondary
##  Min.   : 0.91                   Min.   :  2.44                   
##  1st Qu.: 9.37                   1st Qu.: 13.01                   
##  Median :14.07                   Median : 19.74                   
##  Mean   :14.59                   Mean   : 21.86                   
##  3rd Qu.:19.19                   3rd Qu.: 26.37                   
##  Max.   :37.26                   Max.   :100.87                   
##  NA's   :55                      NA's   :65                       
##  Expenditure_per_student_tertiary Exports_of_goods_and_services
##  Min.   :   0.00                  Min.   :  0.42               
##  1st Qu.:  23.50                  1st Qu.: 26.04               
##  Median :  36.60                  Median : 37.98               
##  Mean   :  97.75                  Mean   : 44.12               
##  3rd Qu.:  90.07                  3rd Qu.: 54.87               
##  Max.   :1145.67                  Max.   :244.30               
##  NA's   :77                       NA's   :20                   
##  Exports_unit_value External_debt_total_DOD_current_USdollars
##  Min.   : 50.81     Min.   :8.180e+07                        
##  1st Qu.: 96.06     1st Qu.:1.575e+09                        
##  Median :115.92     Median :4.640e+09                        
##  Mean   :118.28     Mean   :2.079e+10                        
##  3rd Qu.:139.06     3rd Qu.:1.695e+10                        
##  Max.   :205.80     Max.   :2.820e+11                        
##  NA's   :135        NA's   :71                               
##  External_debt_total_pct_of_GNI Female_labour_force
##  Min.   :  4.91                 Min.   :13.10      
##  1st Qu.: 33.54                 1st Qu.:36.96      
##  Median : 51.58                 Median :41.99      
##  Mean   : 65.10                 Mean   :40.24      
##  3rd Qu.: 76.17                 3rd Qu.:46.19      
##  Max.   :619.18                 Max.   :53.45      
##  NA's   :73                     NA's   :36         
##  Fixed_line_and_mobile_phone_subscribers Foreign_direct_investment_net_inflows
##  Min.   :  0.28                          Min.   :-15.130                      
##  1st Qu.: 12.57                          1st Qu.:  1.165                      
##  Median : 52.05                          Median :  3.000                      
##  Mean   : 62.80                          Mean   :  6.247                      
##  3rd Qu.:111.94                          3rd Qu.:  6.245                      
##  Max.   :211.17                          Max.   :312.670                      
##  NA's   :14                              NA's   :31                           
##  Foreign_direct_investment_net_outflows  Forest_area     
##  Min.   : -5.710                        Min.   :     10  
##  1st Qu.:  0.000                        1st Qu.:   4090  
##  Median :  0.100                        Median :  27540  
##  Mean   :  3.395                        Mean   : 215241  
##  3rd Qu.:  1.030                        3rd Qu.: 110825  
##  Max.   :332.380                        Max.   :8087900  
##  NA's   :41                             NA's   :19       
##  Gross_capital_formation  HIV_infected    Health_expenditure_per_person
##  Min.   : 8.07           Min.   : 0.010   Min.   :   0.23              
##  1st Qu.:18.38           1st Qu.: 0.110   1st Qu.:  33.25              
##  Median :22.41           Median : 0.500   Median : 157.00              
##  Mean   :23.77           Mean   : 2.085   Mean   : 693.04              
##  3rd Qu.:27.71           3rd Qu.: 1.530   3rd Qu.: 486.75              
##  Max.   :63.23           Max.   :26.350   Max.   :6657.00              
##  NA's   :22              NA's   :55       NA's   :22                   
##  Health_expenditure_private Health_expenditure_public_pct_of_GDP
##  Min.   :0.330              Min.   : 0.260                      
##  1st Qu.:1.490              1st Qu.: 1.962                      
##  Median :2.275              Median : 3.190                      
##  Mean   :2.463              Mean   : 3.627                      
##  3rd Qu.:3.192              3rd Qu.: 4.680                      
##  Max.   :8.330              Max.   :11.730                      
##  NA's   :22                 NA's   :22                          
##  Health_expenditure_public_pct_of_government_expenditure
##  Min.   : 0.70                                          
##  1st Qu.: 7.00                                          
##  Median :10.25                                          
##  Mean   :10.56                                          
##  3rd Qu.:13.53                                          
##  Max.   :36.30                                          
##  NA's   :22                                             
##  Health_expenditure_public_pct_of_total_health_expenditure
##  Min.   :11.60                                            
##  1st Qu.:44.05                                            
##  Median :60.55                                            
##  Mean   :57.67                                            
##  3rd Qu.:73.03                                            
##  Max.   :92.40                                            
##  NA's   :22                                               
##  Health_expenditure_total High_technology_exports Hydroelectricity_consumption
##  Min.   : 1.700           Min.   : 0.000          Min.   : 0.13               
##  1st Qu.: 4.275           1st Qu.: 0.835          1st Qu.: 1.22               
##  Median : 5.600           Median : 4.755          Median : 2.94               
##  Mean   : 6.076           Mean   : 9.480          Mean   :11.04               
##  3rd Qu.: 7.600           3rd Qu.:12.730          3rd Qu.: 8.62               
##  Max.   :15.900           Max.   :70.730          Max.   :89.84               
##  NA's   :22               NA's   :36              NA's   :147                 
##  Hydroelectricity_consumption_per_person Imports_of_goods_and_services
##  Min.   :0.0000                          Min.   :  1.05               
##  1st Qu.:0.0425                          1st Qu.: 31.10               
##  Median :0.1200                          Median : 43.18               
##  Mean   :0.4782                          Mean   : 49.30               
##  3rd Qu.:0.2100                          3rd Qu.: 62.16               
##  Max.   :6.7300                          Max.   :214.82               
##  NA's   :148                             NA's   :20                   
##  Imports_unit_value Improved_sanitation_facilities_urban Improved_water_source
##  Min.   : 47.0      Min.   : 24.00                       Min.   : 22.00       
##  1st Qu.:108.0      1st Qu.: 59.00                       1st Qu.: 67.00       
##  Median :121.0      Median : 79.50                       Median : 85.00       
##  Mean   :123.5      Mean   : 76.63                       Mean   : 80.36       
##  3rd Qu.:137.5      3rd Qu.: 97.00                       3rd Qu.: 97.00       
##  Max.   :330.0      Max.   :100.00                       Max.   :100.00       
##  NA's   :151        NA's   :40                           NA's   :32           
##  Income_growth    Income_per_person Income_share_held_by_lowest_20pct
##  Min.   :-7.500   Min.   :  264     Min.   : 1.400                   
##  1st Qu.: 1.137   1st Qu.: 1985     1st Qu.: 4.805                   
##  Median : 3.235   Median : 6461     Median : 6.255                   
##  Mean   : 3.449   Mean   :11865     Mean   : 6.139                   
##  3rd Qu.: 4.680   3rd Qu.:16268     3rd Qu.: 7.465                   
##  Max.   :24.970   Max.   :70014     Max.   :10.580                   
##  NA's   :32       NA's   :11        NA's   :72                       
##  Industry_contribution_to_economy Inequality_index Infant_mortality_rate
##  Min.   : 7.09                    Min.   :24.70    Min.   :  2.00       
##  1st Qu.:22.79                    1st Qu.:34.00    1st Qu.:  8.50       
##  Median :28.29                    Median :39.45    Median : 24.00       
##  Mean   :30.81                    Mean   :40.74    Mean   : 40.47       
##  3rd Qu.:35.09                    3rd Qu.:47.01    3rd Qu.: 64.00       
##  Max.   :94.21                    Max.   :74.33    Max.   :165.00       
##  NA's   :26                       NA's   :72       NA's   :35           
##  Infectious_TB_new_cases_per_100_000_estimated
##  Min.   :  2.0                                
##  1st Qu.: 10.0                                
##  Median : 28.0                                
##  Mean   : 63.0                                
##  3rd Qu.: 93.5                                
##  Max.   :452.0                                
##  NA's   :27                                   
##  Infectious_TB_new_cases_per_100_000_reported
##  Min.   :  0.00                              
##  1st Qu.:  7.00                              
##  Median : 21.00                              
##  Mean   : 35.82                              
##  3rd Qu.: 47.50                              
##  Max.   :262.00                              
##  NA's   :27                                  
##  Infectious_TB_treatment_completeness Inflation_GDP_deflator Internet_users 
##  Min.   : 28                          Min.   : -8.390        Min.   : 0.00  
##  1st Qu.: 72                          1st Qu.:  2.592        1st Qu.: 2.90  
##  Median : 80                          Median :  5.495        Median : 8.40  
##  Mean   : 78                          Mean   : 10.490        Mean   :17.78  
##  3rd Qu.: 86                          3rd Qu.: 11.915        3rd Qu.:27.10  
##  Max.   :100                          Max.   :237.950        Max.   :76.20  
##  NA's   :44                           NA's   :16             NA's   :15     
##  Life_expectancy_at_birth Literacy_rate_adult_female Literacy_rate_adult_male
##  Min.   :40.68            Min.   :12.59              Min.   :31.44           
##  1st Qu.:59.47            1st Qu.:59.66              1st Qu.:74.00           
##  Median :71.33            Median :80.79              Median :88.32           
##  Mean   :67.47            Mean   :73.54              Mean   :82.68           
##  3rd Qu.:76.02            3rd Qu.:93.69              3rd Qu.:95.93           
##  Max.   :82.27            Max.   :99.79              Max.   :99.81           
##  NA's   :34               NA's   :66                 NA's   :66              
##  Literacy_rate_adult_total Literacy_rate_youth_female Literacy_rate_youth_male
##  Min.   :23.55             Min.   :16.86              Min.   :32.25           
##  1st Qu.:67.45             1st Qu.:71.08              1st Qu.:80.89           
##  Median :84.68             Median :95.45              Median :95.72           
##  Mean   :77.98             Mean   :83.31              Mean   :87.92           
##  3rd Qu.:93.83             3rd Qu.:98.84              3rd Qu.:98.88           
##  Max.   :99.80             Max.   :99.95              Max.   :99.96           
##  NA's   :65                NA's   :70                 NA's   :70              
##  Literacy_rate_youth_total Liver_cancer_deaths_per_100_000_men
##  Min.   :24.19             Min.   : 0.710                     
##  1st Qu.:75.81             1st Qu.: 4.015                     
##  Median :95.59             Median : 6.030                     
##  Mean   :85.53             Mean   :10.748                     
##  3rd Qu.:98.87             3rd Qu.:14.425                     
##  Max.   :99.96             Max.   :93.300                     
##  NA's   :70                NA's   :30                         
##  Liver_cancer_deaths_per_100_000_women Liver_cancer_new_cases_per_100_000_men
##  Min.   : 0.200                        Min.   : 0.80                         
##  1st Qu.: 1.900                        1st Qu.: 3.70                         
##  Median : 3.170                        Median : 6.10                         
##  Mean   : 4.738                        Mean   :11.39                         
##  3rd Qu.: 5.500                        3rd Qu.:15.30                         
##  Max.   :47.300                        Max.   :98.90                         
##  NA's   :30                            NA's   :30                            
##  Liver_cancer_new_cases_per_100_000_women Liver_cancer_number_of_female_deaths
##  Min.   : 0.200                           Min.   :    0.5                     
##  1st Qu.: 1.900                           1st Qu.:   41.5                     
##  Median : 3.000                           Median :  147.0                     
##  Mean   : 4.916                           Mean   : 1050.1                     
##  3rd Qu.: 5.600                           3rd Qu.:  408.5                     
##  Max.   :57.300                           Max.   :89055.0                     
##  NA's   :30                               NA's   :31                          
##  Liver_cancer_number_of_male_deaths Liver_cancer_number_of_new_female_cases
##  Min.   :     2                     Min.   :    0                          
##  1st Qu.:    70                     1st Qu.:   41                          
##  Median :   253                     Median :  125                          
##  Mean   :  2410                     Mean   : 1065                          
##  3rd Qu.:   819                     3rd Qu.:  423                          
##  Max.   :232796                     Max.   :94937                          
##  NA's   :31                         NA's   :31                             
##  Liver_cancer_number_of_new_male_cases Lung_cancer_deaths_per_100_000_men
##  Min.   :     2.0                      Min.   : 0.400                    
##  1st Qu.:    67.5                      1st Qu.: 6.435                    
##  Median :   244.0                      Median :16.020                    
##  Mean   :  2554.0                      Mean   :21.095                    
##  3rd Qu.:   799.0                      3rd Qu.:31.457                    
##  Max.   :250907.0                      Max.   :78.660                    
##  NA's   :31                            NA's   :30                        
##  Lung_cancer_deaths_per_100_000_women Lung_cancer_new_cases_per_100_000_men
##  Min.   : 0.100                       Min.   : 0.500                       
##  1st Qu.: 2.100                       1st Qu.: 7.425                       
##  Median : 4.515                       Median :18.750                       
##  Mean   : 5.719                       Mean   :25.281                       
##  3rd Qu.: 7.405                       3rd Qu.:39.850                       
##  Max.   :27.800                       Max.   :94.600                       
##  NA's   :30                           NA's   :30                           
##  Lung_cancer_new_cases_per_100_000_women Lung_cancer_number_of_female_deaths
##  Min.   : 0.100                          Min.   :     0                     
##  1st Qu.: 2.175                          1st Qu.:    21                     
##  Median : 5.300                          Median :   126                     
##  Mean   : 6.863                          Mean   :  1917                     
##  3rd Qu.: 9.400                          3rd Qu.:   742                     
##  Max.   :36.100                          Max.   :109059                     
##  NA's   :30                              NA's   :31                         
##  Lung_cancer_number_of_male_deaths Lung_cancer_number_of_new_female_cases
##  Min.   :     1                    Min.   :     0                        
##  1st Qu.:    79                    1st Qu.:    22                        
##  Median :   441                    Median :   134                        
##  Mean   :  4905                    Mean   :  2242                        
##  3rd Qu.:  2108                    3rd Qu.:   794                        
##  Max.   :231301                    Max.   :126718                        
##  NA's   :31                        NA's   :31                            
##  Lung_cancer_number_of_new_male_cases
##  Min.   :     1.0                    
##  1st Qu.:    82.5                    
##  Median :   483.0                    
##  Mean   :  5579.2                    
##  3rd Qu.:  2414.5                    
##  Max.   :269650.0                    
##  NA's   :31                          
##  Malaria_prevention_insecticide_treated_bed_nets_usage Malaria_treatment
##  Min.   : 0.100                                        Min.   : 0.70    
##  1st Qu.: 1.200                                        1st Qu.:13.05    
##  Median : 3.050                                        Median :35.90    
##  Mean   : 6.485                                        Mean   :35.61    
##  3rd Qu.: 7.400                                        3rd Qu.:57.23    
##  Max.   :53.600                                        Max.   :68.90    
##  NA's   :156                                           NA's   :156      
##  Malnutrition_weight_for_age Market_value_of_listed_companies
##  Min.   : 1.10               Min.   :  0.09                  
##  1st Qu.: 6.10               1st Qu.: 15.92                  
##  Median :15.30               Median : 35.48                  
##  Mean   :16.79               Mean   : 63.37                  
##  3rd Qu.:24.40               3rd Qu.: 87.94                  
##  Max.   :47.60               Max.   :593.25                  
##  NA's   :101                 NA's   :84                      
##  Maternal_mortality Math_achievement_4th_grade Math_achievement_8th_grade
##  Min.   :   0.00    Min.   :331.0              Min.   :298.0             
##  1st Qu.:  25.13    1st Qu.:494.1              1st Qu.:443.9             
##  Median : 100.00    Median :520.8              Median :483.3             
##  Mean   : 305.62    Mean   :509.2              Mean   :477.1             
##  3rd Qu.: 480.00    3rd Qu.:535.9              3rd Qu.:506.7             
##  Max.   :2000.00    Max.   :597.3              Max.   :597.0             
##  NA's   :25         NA's   :182                NA's   :172               
##  Measles_immunization Medical_Doctors  Merchandise_trade Military_expenditure
##  Min.   :23.00        Min.   :0.0200   Min.   : 12.97    Min.   : 0.000      
##  1st Qu.:79.00        1st Qu.:0.2075   1st Qu.: 47.07    1st Qu.: 1.060      
##  Median :90.00        Median :1.1000   Median : 63.97    Median : 1.620      
##  Mean   :84.94        Mean   :1.4057   Mean   : 73.86    Mean   : 2.271      
##  3rd Qu.:97.00        3rd Qu.:2.3100   3rd Qu.: 88.86    3rd Qu.: 2.630      
##  Max.   :99.00        Max.   :5.9100   Max.   :368.19    Max.   :24.110      
##  NA's   :21           NA's   :30       NA's   :21        NA's   :47          
##  Natural_gas_consumption Natural_gas_consumption_per_person
##  Min.   :  0.260         Min.   :   19.46                  
##  1st Qu.:  6.162         1st Qu.:  423.87                  
##  Median : 18.150         Median : 1021.80                  
##  Mean   : 47.488         Mean   : 1942.36                  
##  3rd Qu.: 41.945         3rd Qu.: 1586.02                  
##  Max.   :623.280         Max.   :21667.32                  
##  NA's   :154             NA's   :155                       
##  Natural_gas_production Natural_gas_production_per_person
##  Min.   :  3.55         Min.   :   27.42                 
##  1st Qu.: 11.93         1st Qu.:  280.22                 
##  Median : 25.90         Median : 1248.79                 
##  Mean   : 58.89         Mean   : 4490.66                 
##  3rd Qu.: 57.82         3rd Qu.: 2708.02                 
##  Max.   :597.96         Max.   :53067.55                 
##  NA's   :156            NA's   :156                      
##  Natural_gas_proved_reserves Natural_gas_proven_reserves_per_person
##  Min.   : 0.090              Min.   :    1.02                      
##  1st Qu.: 0.410              1st Qu.:    7.96                      
##  Median : 1.100              Median :   40.03                      
##  Mean   : 3.503              Mean   :  775.75                      
##  3rd Qu.: 2.480              3rd Qu.:  159.17                      
##  Max.   :44.610              Max.   :29703.92                      
##  NA's   :153                 NA's   :153                           
##  Net_barter_terms_of_trade Nuclear_consumption Nuclear_consumption_per_person
##  Min.   : 77.22            Min.   :  0.070     Min.   :0.0000                
##  1st Qu.: 92.74            1st Qu.:  2.575     1st Qu.:0.0600                
##  Median :101.93            Median :  5.530     Median :0.4000                
##  Mean   :110.11            Mean   : 20.836     Mean   :0.4628                
##  3rd Qu.:122.72            3rd Qu.: 19.685     3rd Qu.:0.6500                
##  Max.   :181.79            Max.   :186.260     Max.   :1.8200                
##  NA's   :66                NA's   :172         NA's   :173                   
##  Number_of_deaths_from_TB_estimated Number_of_existing_TB_cases_estimated
##  Min.   :     0                     Min.   :      4                      
##  1st Qu.:    55                     1st Qu.:    562                      
##  Median :   733                     Median :   6025                      
##  Mean   :  9275                     Mean   :  80633                      
##  3rd Qu.:  4440                     3rd Qu.:  38912                      
##  Max.   :330695                     Max.   :3511772                      
##  NA's   :27                         NA's   :28                           
##  Oil_consumption    Oil_consumption_per_person Oil_production    
##  Min.   :   20.82   Min.   : 0.240             Min.   :   72.68  
##  1st Qu.:  207.11   1st Qu.: 4.253             1st Qu.:  295.00  
##  Median :  330.79   Median : 7.825             Median :  734.44  
##  Mean   : 1200.18   Mean   :12.133             Mean   : 1670.16  
##  3rd Qu.: 1231.80   3rd Qu.:15.545             3rd Qu.: 2155.64  
##  Max.   :20802.18   Max.   :65.510             Max.   :11114.43  
##  NA's   :137        NA's   :138                NA's   :154       
##  Oil_production_per_person Oil_proved_reserves Oil_proven_reserves_per_person
##  Min.   :0.00000           Min.   :  0.45      Min.   :    5.48              
##  1st Qu.:0.00000           1st Qu.:  1.86      1st Qu.:   55.56              
##  Median :0.01000           Median :  5.25      Median :  219.26              
##  Mean   :0.05794           Mean   : 25.84      Mean   : 3411.56              
##  3rd Qu.:0.03250           3rd Qu.: 21.97      3rd Qu.: 1938.87              
##  Max.   :0.44000           Max.   :264.21      Max.   :43456.89              
##  NA's   :154               NA's   :155         NA's   :155                   
##  Old_version_of_Income_per_person Patent_applications Patents_granted    
##  Min.   :  225.7                  Min.   :     1      Min.   :     0.38  
##  1st Qu.: 1512.1                  1st Qu.:  1036      1st Qu.:    35.25  
##  Median : 4472.3                  Median : 89636      Median :   240.50  
##  Mean   : 8048.3                  Mean   : 93929      Mean   :  6791.59  
##  3rd Qu.:11950.1                  3rd Qu.:157931      3rd Qu.:  2243.25  
##  Max.   :43696.8                  Max.   :486906      Max.   :167334.00  
##  NA's   :28                       NA's   :59          NA's   :68         
##  Patents_in_force  People_living_with_HIV Personal_computers_per_100_people
##  Min.   :      4   Min.   :     17        Min.   : 0.10                    
##  1st Qu.:    349   1st Qu.:   5735        1st Qu.: 1.40                    
##  Median :   1324   Median :  18492        Median : 5.50                    
##  Mean   :  48481   Mean   : 219519        Mean   :14.89                    
##  3rd Qu.:  17852   3rd Qu.: 127032        3rd Qu.:17.52                    
##  Max.   :1474028   Max.   :5560376        Max.   :82.30                    
##  NA's   :90        NA's   :55             NA's   :36                       
##  Personal_computers_total Population_growth
##  Min.   :      600        Min.   :-1.240   
##  1st Qu.:    45000        1st Qu.: 0.640   
##  Median :   281500        Median : 1.350   
##  Mean   :  4573835        Mean   : 1.423   
##  3rd Qu.:  2143232        3rd Qu.: 2.200   
##  Max.   :224000000        Max.   : 4.090   
##  NA's   :36               NA's   :15       
##  Population_in_urban_agglomerations_more_than_1_million Population_total   
##  Min.   :  3.85                                         Min.   :6.536e+04  
##  1st Qu.: 12.86                                         1st Qu.:2.247e+06  
##  Median : 20.27                                         Median :7.309e+06  
##  Mean   : 24.45                                         Mean   :3.468e+07  
##  3rd Qu.: 31.13                                         3rd Qu.:2.275e+07  
##  Max.   :103.34                                         Max.   :1.300e+09  
##  NA's   :98                                             NA's   :28         
##  Poverty_headcount_ratio_at_national_poverty_line Present_value_of_debt
##  Min.   : 4.60                                    Mode:logical         
##  1st Qu.:21.25                                    NA's:202             
##  Median :35.00                                                         
##  Mean   :37.47                                                         
##  3rd Qu.:51.45                                                         
##  Max.   :74.90                                                         
##  NA's   :115                                                           
##  Primary_completion_rate_total Primary_energy_consumption
##  Min.   : 24.29                Min.   :   2.70           
##  1st Qu.: 72.03                1st Qu.:  25.22           
##  Median : 92.88                Median :  54.11           
##  Mean   : 84.31                Mean   : 155.05           
##  3rd Qu.: 99.37                3rd Qu.: 140.03           
##  Max.   :138.16                Max.   :2342.71           
##  NA's   :30                    NA's   :137               
##  Primary_energy_consumption_per_person Primary_school_completion_pct_of_boys
##  Min.   : 0.130                        Min.   : 24.75                       
##  1st Qu.: 1.735                        1st Qu.: 70.75                       
##  Median : 3.175                        Median : 91.00                       
##  Mean   : 4.057                        Mean   : 82.77                       
##  3rd Qu.: 4.575                        3rd Qu.: 98.00                       
##  Max.   :22.720                        Max.   :114.00                       
##  NA's   :138                           NA's   :55                           
##  Primary_school_completion_pct_of_girls Prostate_cancer_deaths_per_100_000_men
##  Min.   : 18.00                         Min.   : 0.20                         
##  1st Qu.: 64.60                         1st Qu.: 5.35                         
##  Median : 91.00                         Median :11.46                         
##  Mean   : 80.27                         Mean   :12.06                         
##  3rd Qu.: 98.00                         3rd Qu.:16.48                         
##  Max.   :114.00                         Max.   :55.30                         
##  NA's   :55                             NA's   :30                            
##  Prostate_cancer_new_cases_per_100_000_men
##  Min.   :  0.300                          
##  1st Qu.:  8.075                          
##  Median : 19.150                          
##  Mean   : 26.043                          
##  3rd Qu.: 36.500                          
##  Max.   :124.800                          
##  NA's   :30                               
##  Prostate_cancer_number_of_male_deaths Prostate_cancer_number_of_new_male_cases
##  Min.   :    1                         Min.   :     1                          
##  1st Qu.:   83                         1st Qu.:   130                          
##  Median :  274                         Median :   416                          
##  Mean   : 1281                         Mean   :  3946                          
##  3rd Qu.: 1074                         3rd Qu.:  1970                          
##  Max.   :32442                         Max.   :239930                          
##  NA's   :31                            NA's   :31                              
##  Pump_price_for_gasoline
##  Min.   :0.0000         
##  1st Qu.:0.6600         
##  Median :0.8800         
##  Mean   :0.9074         
##  3rd Qu.:1.2000         
##  Max.   :1.9000         
##  NA's   :33             
##  Ratio_of_girls_to_boys_in_primary_and_secondary_education
##  Min.   : 48.12                                           
##  1st Qu.: 93.33                                           
##  Median : 99.08                                           
##  Mean   : 94.80                                           
##  3rd Qu.:101.47                                           
##  Max.   :112.56                                           
##  NA's   :21                                               
##  Ratio_of_young_literate_females_to_males  Roads_paved    
##  Min.   : 36.20                           Min.   :  0.80  
##  1st Qu.: 89.73                           1st Qu.: 18.23  
##  Median : 99.70                           Median : 44.45  
##  Mean   : 91.64                           Mean   : 49.31  
##  3rd Qu.:100.11                           3rd Qu.: 81.39  
##  Max.   :106.36                           Max.   :100.00  
##  NA's   :70                               NA's   :22      
##  SO2_emissions_per_person Services_contribution_to_economy
##  Min.   :  0.160          Min.   : 2.94                   
##  1st Qu.:  1.798          1st Qu.:43.97                   
##  Median :  4.950          Median :55.65                   
##  Mean   : 11.638          Mean   :54.14                   
##  3rd Qu.: 15.178          3rd Qu.:65.71                   
##  Max.   :106.610          Max.   :90.63                   
##  NA's   :62               NA's   :26                      
##  Stomach_cancer_deaths_per_100_000_men Stomach_cancer_deaths_per_100_000_women
##  Min.   : 0.500                        Min.   : 0.500                         
##  1st Qu.: 5.082                        1st Qu.: 3.000                         
##  Median : 8.100                        Median : 5.070                         
##  Mean   :10.653                        Mean   : 5.799                         
##  3rd Qu.:15.262                        3rd Qu.: 7.612                         
##  Max.   :37.100                        Max.   :24.100                         
##  NA's   :30                            NA's   :30                             
##  Stomach_cancer_new_cases_per_100_000_men
##  Min.   : 0.600                          
##  1st Qu.: 6.525                          
##  Median :12.350                          
##  Mean   :14.852                          
##  3rd Qu.:19.800                          
##  Max.   :69.700                          
##  NA's   :30                              
##  Stomach_cancer_new_cases_per_100_000_women
##  Min.   : 0.600                            
##  1st Qu.: 3.675                            
##  Median : 6.400                            
##  Mean   : 7.997                            
##  3rd Qu.: 9.925                            
##  Max.   :30.600                            
##  NA's   :30                                
##  Stomach_cancer_number_of_female_deaths Stomach_cancer_number_of_male_deaths
##  Min.   :     3.0                       Min.   :     2.0                    
##  1st Qu.:    47.5                       1st Qu.:    57.0                    
##  Median :   214.0                       Median :   302.0                    
##  Mean   :  1477.3                       Mean   :  2589.9                    
##  3rd Qu.:   626.5                       3rd Qu.:   945.5                    
##  Max.   :101719.0                       Max.   :206632.0                    
##  NA's   :31                             NA's   :31                          
##  Stomach_cancer_number_of_new_female_cases
##  Min.   :     3                           
##  1st Qu.:    58                           
##  Median :   252                           
##  Mean   :  1921                           
##  3rd Qu.:   736                           
##  Max.   :128478                           
##  NA's   :31                               
##  Stomach_cancer_number_of_new_male_cases Sugar_per_person  Surface_area     
##  Min.   :     2                          Min.   :  5.48   Min.   :      28  
##  1st Qu.:    65                          1st Qu.: 41.10   1st Qu.:   27398  
##  Median :   372                          Median : 87.67   Median :  130980  
##  Mean   :  3505                          Mean   : 82.37   Mean   :  709454  
##  3rd Qu.:  1132                          3rd Qu.:115.07   3rd Qu.:  533852  
##  Max.   :264460                          Max.   :191.78   Max.   :17100000  
##  NA's   :31                              NA's   :29       NA's   :14        
##   Tax_revenue    Total_CO2_emissions  Total_income       Total_reserves   
##  Min.   : 0.98   Min.   :     26     Min.   :5.190e+07   Min.   :   0.99  
##  1st Qu.:12.11   1st Qu.:   1673     1st Qu.:3.318e+09   1st Qu.:  16.29  
##  Median :16.72   Median :  10212     Median :1.145e+10   Median :  28.52  
##  Mean   :17.24   Mean   : 148360     Mean   :2.016e+11   Mean   :  57.25  
##  3rd Qu.:21.65   3rd Qu.:  65492     3rd Qu.:8.680e+10   3rd Qu.:  55.31  
##  Max.   :44.34   Max.   :5776432     Max.   :1.100e+13   Max.   :1334.86  
##  NA's   :65      NA's   :16          NA's   :24          NA's   :74       
##  Trade_balance_goods_and_services Under_five_mortality_from_CME
##  Min.   :-7.140e+11               Min.   :  2.90               
##  1st Qu.:-1.210e+09               1st Qu.: 12.40               
##  Median :-2.240e+08               Median : 29.98               
##  Mean   : 3.424e+08               Mean   : 56.68               
##  3rd Qu.: 1.024e+09               3rd Qu.: 88.70               
##  Max.   : 1.390e+11               Max.   :267.00               
##  NA's   :31                       NA's   :21                   
##  Under_five_mortality_from_IHME Under_five_mortality_rate Urban_population   
##  Min.   :  3.000                Min.   :  2.90            Min.   :    15456  
##  1st Qu.:  8.475                1st Qu.: 12.40            1st Qu.:   917162  
##  Median : 27.600                Median : 29.98            Median :  3427661  
##  Mean   : 54.356                Mean   : 56.68            Mean   : 16657627  
##  3rd Qu.: 82.900                3rd Qu.: 88.70            3rd Qu.:  9837113  
##  Max.   :253.700                Max.   :267.00            Max.   :527000000  
##  NA's   :32                     NA's   :21                NA's   :14         
##  Urban_population_growth Urban_population_pct_of_total
##  Min.   :-1.160          Min.   : 10.00               
##  1st Qu.: 1.105          1st Qu.: 35.65               
##  Median : 1.945          Median : 57.30               
##  Mean   : 2.166          Mean   : 55.20               
##  3rd Qu.: 3.252          3rd Qu.: 72.75               
##  Max.   : 7.850          Max.   :100.00               
##  NA's   :14              NA's   :14
summary(economic_freedom)
##    CountryID      Country.Name         WEBNAME             Region         
##  Min.   :  1.00   Length:188         Length:188         Length:188        
##  1st Qu.: 46.75   Class :character   Class :character   Class :character  
##  Median : 93.50   Mode  :character   Mode  :character   Mode  :character  
##  Mean   : 93.59                                                           
##  3rd Qu.:140.25                                                           
##  Max.   :186.00                                                           
##  NA's   :4                                                                
##   World.Rank        Region.Rank        X2022.Score        Property.Rights   
##  Length:188         Length:188         Length:188         Length:188        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##  Judical.Effectiveness Government.Integrity  Tax.Burden       
##  Length:188            Length:188           Length:188        
##  Class :character      Class :character     Class :character  
##  Mode  :character      Mode  :character     Mode  :character  
##                                                               
##                                                               
##                                                               
##                                                               
##  Gov.t.Spending     Fiscal.Health      Business.Freedom   Labor.Freedom     
##  Length:188         Length:188         Length:188         Length:188        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##  Monetary.Freedom   Trade.Freedom      Investment.Freedom. Financial.Freedom 
##  Length:188         Length:188         Length:188          Length:188        
##  Class :character   Class :character   Class :character    Class :character  
##  Mode  :character   Mode  :character   Mode  :character    Mode  :character  
##                                                                              
##                                                                              
##                                                                              
##                                                                              
##  Tariff.Rate....         X             Income.Tax.Rate....
##  Length:188         Length:188         Length:188         
##  Class :character   Class :character   Class :character   
##  Mode  :character   Mode  :character   Mode  :character   
##                                                           
##                                                           
##                                                           
##                                                           
##  Corporate.Tax.Rate.... Tax.Burden...of.GDP     X.1           
##  Length:188             Length:188          Length:188        
##  Class :character       Class :character    Class :character  
##  Mode  :character       Mode  :character    Mode  :character  
##                                                               
##                                                               
##                                                               
##                                                               
##  Gov.t.Expenditure...of.GDP.   Country              X.2           
##  Length:188                  Length:188         Length:188        
##  Class :character            Class :character   Class :character  
##  Mode  :character            Mode  :character   Mode  :character  
##                                                                   
##                                                                   
##                                                                   
##                                                                   
##  Population..Millions. GDP..Billions..PPP. GDP.Growth.Rate....
##  Min.   :   0.038      Length:188          Length:188         
##  1st Qu.:   2.575      Class :character    Class :character   
##  Median :   9.800      Mode  :character    Mode  :character   
##  Mean   :  41.972                                             
##  3rd Qu.:  31.150                                             
##  Max.   :1402.100                                             
##  NA's   :4                                                    
##  X5.Year.GDP.Growth.Rate.... GDP.per.Capita..PPP. Unemployment....  
##  Length:188                  Length:188           Length:188        
##  Class :character            Class :character     Class :character  
##  Mode  :character            Mode  :character     Mode  :character  
##                                                                     
##                                                                     
##                                                                     
##                                                                     
##  Inflation....      FDI.Inflow..Millions. Public.Debt....of.GDP.
##  Length:188         Length:188            Length:188            
##  Class :character   Class :character      Class :character      
##  Mode  :character   Mode  :character      Mode  :character      
##                                                                 
##                                                                 
##                                                                 
## 
str(economic_freedom)
## 'data.frame':    188 obs. of  37 variables:
##  $ CountryID                  : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Country.Name               : chr  "Afghanistan" "Albania" "Algeria" "Angola" ...
##  $ WEBNAME                    : chr  "Afghanistan" "Albania" "Algeria" "Angola" ...
##  $ Region                     : chr  "Asia-Pacific" "Europe" "Middle East and North Africa" "Sub-Saharan Africa" ...
##  $ World.Rank                 : chr  "N/A" "50" "167" "139" ...
##  $ Region.Rank                : chr  "N/A" "30" "13" "29" ...
##  $ X2022.Score                : chr  "N/A" "66.6" "45.8" "52.6" ...
##  $ Property.Rights            : chr  "N/A" "55.5" "27.9" "39.8" ...
##  $ Judical.Effectiveness      : chr  "N/A" "49.8" "29.7" "25.3" ...
##  $ Government.Integrity       : chr  "N/A" "35.6" "30.1" "20.6" ...
##  $ Tax.Burden                 : chr  "N/A" "89.1" "67.2" "86.6" ...
##  $ Gov.t.Spending             : chr  "N/A" "72.1" "57.1" "86.4" ...
##  $ Fiscal.Health              : chr  "N/A" "70.6" "38.6" "80.0" ...
##  $ Business.Freedom           : chr  "N/A" "70.7" "50.0" "37.6" ...
##  $ Labor.Freedom              : chr  "N/A" "51.1" "51.5" "53.9" ...
##  $ Monetary.Freedom           : chr  "N/A" "82.0" "80.1" "61.2" ...
##  $ Trade.Freedom              : chr  "N/A" "82.6" "57.4" "70.0" ...
##  $ Investment.Freedom.        : chr  "N/A" "70" "30" "30" ...
##  $ Financial.Freedom          : chr  "N/A" "70" "30" "40" ...
##  $ Tariff.Rate....            : chr  "N/A" "3.7" "13.8" "7.5" ...
##  $ X                          : chr  "Afghanistan" "Albania" "Algeria" "Angola" ...
##  $ Income.Tax.Rate....        : chr  "20.0" "23.0" "35.0" "25.0" ...
##  $ Corporate.Tax.Rate....     : chr  "20.0" "15.0" "26.0" "25.0" ...
##  $ Tax.Burden...of.GDP        : chr  "7.5" "18.3" "37.2" "9.4" ...
##  $ X.1                        : chr  "Afganistan " "Albania" "Algeria" "Angola" ...
##  $ Gov.t.Expenditure...of.GDP.: chr  "N/A" "30.5" "37.8" "21.3" ...
##  $ Country                    : chr  "Afghanistan" "Albania" "Algeria" "Angola" ...
##  $ X.2                        : chr  "Afghanistan" "Albania" "Algeria" "Angola" ...
##  $ Population..Millions.      : num  38.9 2.8 43.9 32.9 45.4 3 25.7 8.9 10.1 0.4 ...
##  $ GDP..Billions..PPP.        : chr  "$78.7 " "$40.7 " "$491.5 " "$215.1 " ...
##  $ GDP.Growth.Rate....        : chr  "-5.0" "-3.5" "-6.0" "-4.0" ...
##  $ X5.Year.GDP.Growth.Rate....: chr  "1.0" "2.0" "0.1" "-1.9" ...
##  $ GDP.per.Capita..PPP.       : chr  "$2,390" "$14,218" "$11,112" "$6,932" ...
##  $ Unemployment....           : chr  "11.7" "11.7" "12.8" "7.7" ...
##  $ Inflation....              : chr  "5.6" "1.6" "2.4" "22.3" ...
##  $ FDI.Inflow..Millions.      : chr  "13.0" "1,107.0" "1,125.0" "-1,866.0" ...
##  $ Public.Debt....of.GDP.     : chr  "7.8" "76.0" "53.1" "127.1" ...
str(who)
## 'data.frame':    202 obs. of  358 variables:
##  $ Country                                                                                                                                    : chr  "Afghanistan" "Albania" "Algeria" "Andorra" ...
##  $ CountryID                                                                                                                                  : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Continent                                                                                                                                  : int  1 2 3 2 3 4 5 2 6 2 ...
##  $ Adolescent.fertility.rate....                                                                                                              : int  151 27 6 NA 146 NA 62 30 16 14 ...
##  $ Adult.literacy.rate....                                                                                                                    : num  28 98.7 69.9 NA 67.4 NA 97.2 99.4 NA NA ...
##  $ Gross.national.income.per.capita..PPP.international...                                                                                     : int  NA 6000 5940 NA 3890 15130 11670 4950 33940 36040 ...
##  $ Net.primary.school.enrolment.ratio.female....                                                                                              : int  NA 93 94 83 49 NA 98 84 97 98 ...
##  $ Net.primary.school.enrolment.ratio.male....                                                                                                : int  NA 94 96 83 51 NA 99 80 96 97 ...
##  $ Population..in.thousands..total                                                                                                            : int  26088 3172 33351 74 16557 84 39134 3010 20530 8327 ...
##  $ Population.annual.growth.rate....                                                                                                          : num  4 0.6 1.5 1 2.8 1.3 1 -0.3 1.1 0.4 ...
##  $ Population.in.urban.areas....                                                                                                              : int  23 46 64 93 54 37 90 64 88 66 ...
##  $ Population.living.below.the.poverty.line....living.on..lt..US.1.per.day.                                                                   : num  NA 2 NA NA NA NA 6.6 2 NA NA ...
##  $ Population.median.age..years.                                                                                                              : int  16 29 24 NA 17 NA 29 32 37 40 ...
##  $ Population.proportion.over.60....                                                                                                          : int  4 13 7 22 4 11 14 14 18 22 ...
##  $ Population.proportion.under.15....                                                                                                         : int  47 26 29 14 46 28 26 20 19 16 ...
##  $ Registration.coverage.of.births....                                                                                                        : int  6 90 90 90 29 NA 90 90 90 90 ...
##  $ Total.fertility.rate..per.woman.                                                                                                           : num  7.2 2.1 2.4 1.3 6.5 2.2 2.3 1.3 1.8 1.4 ...
##  $ Antenatal.care.coverage...at.least.four.visits....                                                                                         : int  NA NA 41 NA NA NA NA 71 NA NA ...
##  $ Antiretroviral.therapy.coverage.among.HIV.infected.pregt.women.for.PMTCT....                                                               : int  NA NA NA NA 14 NA NA NA NA NA ...
##  $ Antiretroviral.therapy.coverage.among.people.with.advanced.HIV.infections....                                                              : int  NA NA 14 NA 16 NA 71 8 NA NA ...
##  $ Births.attended.by.skilled.health.personnel....                                                                                            : int  14 100 95 NA 45 100 99 98 100 NA ...
##  $ Births.by.caesarean.section....                                                                                                            : int  NA 15 6 NA NA NA NA 9 NA 21 ...
##  $ Children.aged.6.59.months.who.received.vitamin.A.supplementation....                                                                       : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Children.aged..lt.5.years.sleeping.under.insecticide.treated.nets....                                                                      : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Children.aged..lt.5.years.who.received.any.antimalarial.treatment.for.fever....                                                            : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Children.aged..lt.5.years.with.ARI.symptoms.taken.to.facility....                                                                          : num  NA NA NA NA NA NA NA 31.9 NA NA ...
##  $ Children.aged..lt.5.years.with.diarrhoea.receiving.ORT....                                                                                 : num  NA NA NA NA NA NA NA 65.3 NA NA ...
##  $ Contraceptive.prevalence....                                                                                                               : num  10.3 75.1 61.4 NA 6.2 NA 65.3 53.1 NA NA ...
##  $ Neonates.protected.at.birth.against.neonatal.tetanus..PAB.....                                                                             : int  73 87 70 NA 81 NA NA NA NA NA ...
##  $ One.year.olds.immunized.with.MCV                                                                                                           : int  70 97 92 94 88 99 99 92 94 79 ...
##  $ One.year.olds.immunized.with.three.doses.of.diphtheria.tetanus.toxoid.and.pertussis..DTP3.....                                             : int  83 98 95 96 83 99 96 88 92 85 ...
##  $ One.year.olds.immunized.with.three.doses.of.Hepatitis.B..HepB3.....                                                                        : int  83 98 90 91 83 97 92 85 94 85 ...
##  $ One.year.olds.immunized.with.three.doses.of.Hib..Hib3..vaccine....                                                                         : int  NA NA NA 95 83 99 96 NA 94 85 ...
##  $ Tuberculosis.detection.rate.under.DOTS....                                                                                                 : int  66 37 102 125 76 284 71 59 40 46 ...
##  $ Tuberculosis.treatment.success.under.DOTS....                                                                                              : int  90 77 87 80 72 100 53 72 80 75 ...
##  $ Women.who.have.had.mammography....                                                                                                         : int  NA NA NA NA NA NA NA NA 57 76 ...
##  $ Women.who.have.had.PAP.smear....                                                                                                           : int  NA NA NA NA NA NA NA NA 61 83 ...
##  $ Community.and.traditional.health.workers.density..per.10.000.population.                                                                   : int  NA NA NA NA NA NA NA NA 2 NA ...
##  $ Dentistry.personnel.density..per.10.000.population.                                                                                        : int  NA 3 3 7 NA 2 8 4 11 5 ...
##  $ Environment.and.public.health.workers.density..per.10.000.population.                                                                      : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ External.resources.for.health.as.percentage.of.total.expenditure.on.health                                                                 : num  20.1 3.7 0.1 0 7 0.2 0.1 14.5 0 0 ...
##  $ General.government.expenditure.on.health.as.percentage.of.total.expenditure.on.health                                                      : num  27.5 35.5 77.3 70.6 86.6 67.3 45.5 41.2 67.2 77 ...
##  $ General.government.expenditure.on.health.as.percentage.of.total.government.expenditure                                                     : num  4.4 11.3 9.5 22.7 5 11.3 14.2 9.7 17.2 15.5 ...
##  $ Hospital.beds..per.10.000.population.                                                                                                      : int  4 30 17 26 1 24 41 44 40 76 ...
##  $ Laboratory.health.workers.density..per.10.000.population.                                                                                  : int  NA NA 3 NA 1 NA NA NA 4 NA ...
##  $ Number.of.community.and.traditional.health.workers                                                                                         : int  NA NA 1062 NA NA NA NA NA 3812 NA ...
##  $ Number.of.dentistry.personnel                                                                                                              : int  900 1035 9553 46 222 13 28900 1255 21296 4467 ...
##  $ Number.of.environment.and.public.health.workers                                                                                            : int  NA NA 2534 NA NA NA NA NA NA NA ...
##  $ Number.of.laboratory.health.workers                                                                                                        : int  NA NA 8838 NA 2029 NA NA NA 8326 NA ...
##  $ Number.of.nursing.and.midwifery.personnel                                                                                                  : int  14930 14637 69749 259 18977 233 29000 14806 187837 53782 ...
##  $ Number.of.other.health.service.providers                                                                                                   : int  NA NA 6716 NA 254 NA NA NA 42151 NA ...
##  $ Number.of.pharmaceutical.personnel                                                                                                         : int  900 1173 6333 72 919 NA 15300 157 13956 5076 ...
##  $ Number.of.physicians                                                                                                                       : int  5970 3626 35368 244 1165 12 108800 11133 47875 30068 ...
##  $ Nursing.and.midwifery.personnel.density..per.10.000.population.                                                                            : int  5 47 22 39 14 33 8 49 97 66 ...
##  $ Other.health.service.providers.density..per.10.000.population.                                                                             : int  NA NA 2 NA NA NA NA NA 22 NA ...
##  $ Out.of.pocket.expenditure.as.percentage.of.private.expenditure.on.health                                                                   : num  97.2 94.7 94.6 73.2 100 86.9 43.8 87.6 55.7 72.2 ...
##  $ Per.capita.government.expenditure.on.health..PPP.int....                                                                                   : int  8 127 146 2054 61 439 758 112 2097 2729 ...
##  $ Per.capita.government.expenditure.on.health.at.average.exchange.rate..US..                                                                 : int  6 62 95 1987 62 348 251 41 2227 2975 ...
##  $ Per.capita.total.expenditure.on.health..PPP.int....                                                                                        : int  29 358 188 2910 71 652 1665 272 3122 3545 ...
##  $ Per.capita.total.expenditure.on.health.at.average.exchange.rate..US..                                                                      : int  23 174 123 2815 71 517 551 99 3316 3864 ...
##  $ Pharmaceutical.personnel.density..per.10.000.population.                                                                                   : int  NA 4 2 11 NA NA 4 NA 7 6 ...
##  $ Physicians.density..per.10.000.population.                                                                                                 : num  2 12 11 36 NA 2 30 37 25 37 ...
##  $ Private.expenditure.on.health.as.percentage.of.total.expenditure.on.health                                                                 : num  72.5 64.5 22.7 29.4 13.4 32.7 54.5 58.8 32.8 23 ...
##  $ Private.prepaid.plans.as.percentage.of.private.expenditure.on.health                                                                       : num  0 0 5.2 24.6 0 13.1 51.1 0.1 22 23 ...
##  $ Ratio.of.health.management.and.support.workers.to.health.service.providers                                                                 : num  NA NA 0.4 NA 0.01 NA NA NA 1.5 NA ...
##  $ Ratio.of.nurses.and.midwives.to.physicians                                                                                                 : num  2.5 4 2 1.1 16.9 19.3 0.3 1.3 3.9 1.8 ...
##  $ Social.security.expenditure.on.health.as.percentage.of.general.government.expenditure.on.health                                            : num  0 32.8 33.3 87.7 0 0 58.5 0 0 61 ...
##  $ Total.expenditure.on.health.as.percentage.of.gross.domestic.product                                                                        : num  5.4 6.2 3.6 6.3 2.7 4.9 10.1 4.7 8.7 9.9 ...
##  $ Births.attended.by.skilled.health.personnel.....highest.educational.level.of.mother                                                        : num  NA NA NA NA NA NA NA 97.6 NA NA ...
##  $ Births.attended.by.skilled.health.personnel.....highest.wealth.quintile                                                                    : num  NA NA NA NA NA NA NA 100 NA NA ...
##  $ Births.attended.by.skilled.health.personnel.....lowest.educational.level.of.mother                                                         : num  NA NA NA NA NA NA NA 97.4 NA NA ...
##  $ Births.attended.by.skilled.health.personnel.....lowest.wealth.quintile                                                                     : num  NA NA NA NA NA NA NA 92.8 NA NA ...
##  $ Births.attended.by.skilled.health.personnel.....rural                                                                                      : num  NA NA NA NA NA NA NA 98 NA NA ...
##  $ Births.attended.by.skilled.health.personnel.....urban                                                                                      : num  NA NA NA NA NA NA NA 98.6 NA NA ...
##  $ Births.attended.by.skilled.health.personnel.difference.highest.lowest.educational.level.of.mother                                          : num  NA NA NA NA NA NA NA 0.2 NA NA ...
##  $ Births.attended.by.skilled.health.personnel.difference.highest.lowest.wealth.quintile                                                      : num  NA NA NA NA NA NA NA 7.2 NA NA ...
##  $ Births.attended.by.skilled.health.personnel.difference.urban.rural                                                                         : num  NA NA NA NA NA NA NA 0.6 NA NA ...
##  $ Births.attended.by.skilled.health.personnel.ratio.highest.lowest.educational.level.of.mother                                               : num  NA NA NA NA NA NA NA 1 NA NA ...
##  $ Births.attended.by.skilled.health.personnel.ratio.highest.lowest.wealth.quintile                                                           : num  NA NA NA NA NA NA NA 1.1 NA NA ...
##  $ Births.attended.by.skilled.health.personnel.ratio.urban.rural                                                                              : num  NA NA NA NA NA NA NA 1 NA NA ...
##  $ Measles.immunization.coverage.among.one.year.olds.....highest.educational.level.of.mother                                                  : num  NA NA NA NA NA NA NA 79.4 NA NA ...
##  $ Measles.immunization.coverage.among.one.year.olds.....highest.wealth.quintile                                                              : num  NA NA NA NA NA NA NA 60.7 NA NA ...
##  $ Measles.immunization.coverage.among.one.year.olds.....lowest.educational.level.of.mother                                                   : num  NA NA NA NA NA NA NA 70.8 NA NA ...
##  $ Measles.immunization.coverage.among.one.year.olds.....lowest.wealth.quintile                                                               : num  NA NA NA NA NA NA NA 71.6 NA NA ...
##  $ Measles.immunization.coverage.among.one.year.olds.....rural                                                                                : num  NA NA NA NA NA NA NA 80.4 NA NA ...
##  $ Measles.immunization.coverage.among.one.year.olds.....urban                                                                                : num  NA NA NA NA NA NA NA 67 NA NA ...
##  $ Measles.immunization.coverage.among.one.year.olds.difference.highest.lowest.educational.level.of.mother                                    : num  NA NA NA NA NA NA NA 8.6 NA NA ...
##  $ Measles.immunization.coverage.among.one.year.olds.difference.highest.lowest.wealth.quintile                                                : num  NA NA NA NA NA NA NA -10.9 NA NA ...
##  $ Measles.immunization.coverage.among.one.year.olds.difference.urban.rural                                                                   : num  NA NA NA NA NA NA NA -13.4 NA NA ...
##  $ Measles.immunization.coverage.among.one.year.olds.ratio.highest.lowest.educational.level.of.mother                                         : num  NA NA NA NA NA NA NA 1.1 NA NA ...
##  $ Measles.immunization.coverage.among.one.year.olds.ratio.highest.lowest.wealth.quintile                                                     : num  NA NA NA NA NA NA NA 0.8 NA NA ...
##  $ Measles.immunization.coverage.among.one.year.olds.ratio.urban.rural                                                                        : num  NA NA NA NA NA NA NA 0.8 NA NA ...
##  $ Under.5.mortality.rate..Probability.of.dying.aged..lt..5.years.per.1.000.live.births..difference.lowest.highest.educational.level.of.mother: num  NA NA NA NA NA NA NA 6 NA NA ...
##  $ Under.5.mortality.rate..Probability.of.dying.aged..lt..5.years.per.1.000.live.births..difference.lowest.highest.wealth.quintile            : num  NA NA NA NA NA NA NA 29 NA NA ...
##  $ Under.5.mortality.rate..Probability.of.dying.aged..lt..5.years.per.1.000.live.births..difference.rural.urban                               : num  NA NA NA NA NA NA NA 16 NA NA ...
##  $ Under.5.mortality.rate..Probability.of.dying.aged..lt..5.years.per.1.000.live.births..highest.educational.level.of.mother                  : num  NA NA NA NA NA NA NA 27 NA NA ...
##  $ Under.5.mortality.rate..Probability.of.dying.aged..lt..5.years.per.1.000.live.births..highest.wealth.quintile                              : num  NA NA NA NA NA NA NA 23 NA NA ...
##  $ Under.5.mortality.rate..Probability.of.dying.aged..lt..5.years.per.1.000.live.births..lowest.educational.level.of.mother                   : num  NA NA NA NA NA NA NA 33 NA NA ...
##  $ Under.5.mortality.rate..Probability.of.dying.aged..lt..5.years.per.1.000.live.births..lowest.wealth.quintile                               : num  NA NA NA NA NA NA NA 52 NA NA ...
##   [list output truncated]

Economic freedom dataset has some interesnting indicators apart from the classical ones of GDP, GNI… In general, we can use some “social” and health variables of the who dataset to meassure the quality of life and social freedom, and the other dataset to meassure the Economic freedom of each country. Therefore, if we merge both datasets we can see which are the freer countries in all aspects (and if they are also the ones with a better quality of life).

Merge the datasets

To merge the datasets, we need a common variable. In our case it is the name of the country. But as I get the datasets from different sources, we need to check if the country names are written in the same way

# Are the countries written in the same way?
for(j in 1:nrow(economic_freedom)){
  if(!any(economic_freedom[j,27] == who[,1])){
    cat("The row", j, "is written different or does not appear in the WHO dataset:", economic_freedom[j,27], "\n")
  }
}
## The row 26 is written different or does not appear in the WHO dataset: Burma 
## The row 31 is written different or does not appear in the WHO dataset: Cabo Verde 
## The row 38 is written different or does not appear in the WHO dataset: Congo, Democratic Republic of the Congo 
## The row 39 is written different or does not appear in the WHO dataset: Congo, Republic of 
## The row 41 is written different or does not appear in the WHO dataset: Côte d'Ivoire 
## The row 56 is written different or does not appear in the WHO dataset: Eswatini 
## The row 77 is written different or does not appear in the WHO dataset: Iran 
## The row 88 is written different or does not appear in the WHO dataset: Korea, North  
## The row 89 is written different or does not appear in the WHO dataset: Korea, South 
## The row 90 is written different or does not appear in the WHO dataset: Kosovo 
## The row 92 is written different or does not appear in the WHO dataset: Kyrgyz Republic 
## The row 93 is written different or does not appear in the WHO dataset: Lao P.D.R. 
## The row 98 is written different or does not appear in the WHO dataset: Libya 
## The row 99 is written different or does not appear in the WHO dataset: Liechtenstein 
## The row 112 is written different or does not appear in the WHO dataset: Micronesia 
## The row 142 is written different or does not appear in the WHO dataset: São Tomé and Príncipe 
## The row 149 is written different or does not appear in the WHO dataset: Slovak Republic 
## The row 161 is written different or does not appear in the WHO dataset: Taiwan  
## The row 176 is written different or does not appear in the WHO dataset: United States 
## The row 185 is written different or does not appear in the WHO dataset:  
## The row 186 is written different or does not appear in the WHO dataset:  
## The row 187 is written different or does not appear in the WHO dataset:  
## The row 188 is written different or does not appear in the WHO dataset:
# We can see that there are about 20 countries that appear in both
# datasets but with different names. For example, we have "United States"
#  on the economic_freedom dataset, and "United States of America" 
# on the other.
# Let's change the names so they appear in the same way in both data sets.
who$Country[which(who$Country == 'Cape Verde')] = 'Cabo Verde'
economic_freedom$Country[which(economic_freedom$Country == "Congo, Democratic Republic of the Congo")]=
  "Congo, Dem. Rep."
economic_freedom$Country[which(economic_freedom$Country == "Congo, Republic of")]=
  "Congo, Rep."
economic_freedom$Country[which(economic_freedom$Country == "Côte d'Ivoire")]=
  "Cote d'Ivoire"
who$Country[which(who$Country == "Iran (Islamic Republic of)")]=
  "Iran"
who$Country[which(who$Country == 'Korea, Dem. Rep.')] = "Korea, North "
who$Country[which(who$Country == 'Korea, Rep.')] = 'Korea, South'
economic_freedom$Country[which(economic_freedom$Country == "Kyrgyz Republic")]=
  "Kyrgyzstan"
who$Country[which(who$Country == "Lao People's Democratic Republic")] = "Lao P.D.R."
who$Country[which(who$Country == "Libyan Arab Jamahiriya")] = 'Libya'
economic_freedom$Country[which(economic_freedom$Country == "North Macedonia")]=
  "Macedonia"
who$Country[which(who$Country == "Micronesia (Federated States of)")] = 'Micronesia'
economic_freedom$Country[which(economic_freedom$Country == "São Tomé and Príncipe")]=
  "Sao Tome and Principe"
economic_freedom$Country[which(economic_freedom$Country == "Slovak Republic")]=
  "Slovakia"
economic_freedom$Country[which(economic_freedom$Country == "Taiwan ")]=
  "Taiwan"
who$Country[which(who$Country == "United States of America")] = 'United States'

economic_freedom$Country[which(economic_freedom$Country == "Burma")]=
  "Myanmar"
who$Country[which(who$Country == "Swaziland")] = 'Eswatini'


# If we check again the countries in common, we now see that all the countries that appear on the economic_freedom dataset are also in the other one, except from just Kosovo and Liechtenstein.
for(j in 1:nrow(economic_freedom)){
  if(!any(economic_freedom[j,27] == who[,1])){
    cat("The row", j, "is not in both dataset:", economic_freedom[j,27], "\n")
  }
}
## The row 41 is not in both dataset: Côte d'Ivoire 
## The row 90 is not in both dataset: Kosovo 
## The row 99 is not in both dataset: Liechtenstein 
## The row 142 is not in both dataset: São Tomé and Príncipe 
## The row 185 is not in both dataset:  
## The row 186 is not in both dataset:  
## The row 187 is not in both dataset:  
## The row 188 is not in both dataset:
# There is also something strange with the lines from 185 to 188 of 
# the economic dataset. It seems that they don't have any value
# on the variable Country (the name of the country). Let's have a look.
for(i in 1:nrow(economic_freedom)){
  if(is.na(economic_freedom[i,1])){
    cat("Row ",i," has NA values\n")
  }
}
## Row  185  has NA values
## Row  186  has NA values
## Row  187  has NA values
## Row  188  has NA values
md.pattern(economic_freedom)

##     Country.Name WEBNAME Region World.Rank Region.Rank X2022.Score
## 183            1       1      1          1           1           1
## 4              1       1      1          1           1           1
## 1              1       1      1          1           1           1
##                0       0      0          0           0           0
##     Property.Rights Judical.Effectiveness Government.Integrity Tax.Burden
## 183               1                     1                    1          1
## 4                 1                     1                    1          1
## 1                 1                     1                    1          1
##                   0                     0                    0          0
##     Gov.t.Spending Fiscal.Health Business.Freedom Labor.Freedom
## 183              1             1                1             1
## 4                1             1                1             1
## 1                1             1                1             1
##                  0             0                0             0
##     Monetary.Freedom Trade.Freedom Investment.Freedom. Financial.Freedom
## 183                1             1                   1                 1
## 4                  1             1                   1                 1
## 1                  1             1                   1                 1
##                    0             0                   0                 0
##     Tariff.Rate.... X Income.Tax.Rate.... Corporate.Tax.Rate....
## 183               1 1                   1                      1
## 4                 1 1                   1                      1
## 1                 1 1                   1                      1
##                   0 0                   0                      0
##     Tax.Burden...of.GDP X.1 Gov.t.Expenditure...of.GDP. Country X.2
## 183                   1   1                           1       1   1
## 4                     1   1                           1       1   1
## 1                     1   1                           1       1   1
##                       0   0                           0       0   0
##     GDP..Billions..PPP. GDP.Growth.Rate.... X5.Year.GDP.Growth.Rate....
## 183                   1                   1                           1
## 4                     1                   1                           1
## 1                     1                   1                           1
##                       0                   0                           0
##     GDP.per.Capita..PPP. Unemployment.... Inflation.... Public.Debt....of.GDP.
## 183                    1                1             1                      1
## 4                      1                1             1                      1
## 1                      1                1             1                      1
##                        0                0             0                      0
##     FDI.Inflow..Millions. CountryID Population..Millions.  
## 183                     1         1                     1 0
## 4                       1         0                     0 2
## 1                       0         1                     1 1
##                         1         4                     4 9
# After doing this, we know that the last 4 rows of the dataset are empty. 
# We can remove them, but as we are going to do a merge now, we are going
# get rid of them anyway.

# Now, we can finally do the merge.
# We have lost some countries of the who dataset, since not all the 
# countries that appear there were also in the economic_freedom dataset,
# which is normal. 

total.data= merge(x = who, y = economic_freedom, by = 'Country')

Let’s select some relevant variables. Later we will see which are the more important ones.

data = 
  total.data %>% select(Country,Total.fertility.rate..per.woman.,
                  Gross.national.income.per.capita..PPP.international...,
                  Population.living.below.the.poverty.line....living.on..lt..US.1.per.day.,
                  Adult.mortality.rate..probability.of.dying.between.15.to.60.years.per.1000.population..both.sexes,
                  Infant.mortality.rate..per.1.000.live.births..both.sexes,
                  Life.expectancy.at.birth..years..both.sexes, 
                  Under.5.mortality.rate..probability.of.dying.by.age.5.per.1000.live.births..both.sexes,
                  CO2_emissions, Cell_phones_per_100_people,
                  Income_per_person, Inflation...., Business.Freedom,
                  Internet_users, Democracy_score, 
                  Judical.Effectiveness, X2022.Score, Property.Rights,
                        Unemployment...., GDP.per.Capita..PPP.)

New variable I also think that it would be interesting to have a variable that was representative of the ‘equality’ in the country. Since there is not variable that measures exactly that, I think a good approximation is to check of the percentage of women and men enrolled in primary school is similar. Countries that are more advanced in equality tend to have an enrollment ratio in primary school similar in both sexes. Hence, I am going to create a new variable called ‘education_equality’ that shows this information:

data$Education_equality = total.data$Net.primary.school.enrolment.ratio.female..../total.data$Net.primary.school.enrolment.ratio.male....

Finding NA’s

# FIND the NAs
# How many NAs we have?
sum(is.na(data))
## [1] 182
# With the function summary we can see more in detail in which variable 
# the NAs are.
summary(data)
##    Country          Total.fertility.rate..per.woman.
##  Length:180         Min.   :1.200                   
##  Class :character   1st Qu.:1.800                   
##  Mode  :character   Median :2.500                   
##                     Mean   :3.026                   
##                     3rd Qu.:3.950                   
##                     Max.   :7.300                   
##                     NA's   :1                       
##  Gross.national.income.per.capita..PPP.international...
##  Min.   :  260                                         
##  1st Qu.: 2135                                         
##  Median : 6110                                         
##  Mean   :11349                                         
##  3rd Qu.:14695                                         
##  Max.   :60870                                         
##  NA's   :9                                             
##  Population.living.below.the.poverty.line....living.on..lt..US.1.per.day.
##  Min.   : 2.00                                                           
##  1st Qu.: 2.00                                                           
##  Median : 7.40                                                           
##  Mean   :16.04                                                           
##  3rd Qu.:23.10                                                           
##  Max.   :70.80                                                           
##  NA's   :109                                                             
##  Adult.mortality.rate..probability.of.dying.between.15.to.60.years.per.1000.population..both.sexes
##  Min.   : 58                                                                                      
##  1st Qu.:120                                                                                      
##  Median :186                                                                                      
##  Mean   :224                                                                                      
##  3rd Qu.:284                                                                                      
##  Max.   :751                                                                                      
##  NA's   :1                                                                                        
##  Infant.mortality.rate..per.1.000.live.births..both.sexes
##  Min.   :  2.00                                          
##  1st Qu.:  9.00                                          
##  Median : 23.00                                          
##  Mean   : 38.93                                          
##  3rd Qu.: 60.00                                          
##  Max.   :165.00                                          
##  NA's   :1                                               
##  Life.expectancy.at.birth..years..both.sexes
##  Min.   :40.00                              
##  1st Qu.:61.00                              
##  Median :70.00                              
##  Mean   :67.09                              
##  3rd Qu.:75.00                              
##  Max.   :83.00                              
##  NA's   :1                                  
##  Under.5.mortality.rate..probability.of.dying.by.age.5.per.1000.live.births..both.sexes
##  Min.   :  3.0                                                                         
##  1st Qu.: 10.0                                                                         
##  Median : 27.0                                                                         
##  Mean   : 56.4                                                                         
##  3rd Qu.: 81.0                                                                         
##  Max.   :269.0                                                                         
##  NA's   :1                                                                             
##  CO2_emissions     Cell_phones_per_100_people Income_per_person
##  Min.   : 0.0100   Min.   :  0.300            Min.   :  264    
##  1st Qu.: 0.6275   1st Qu.:  8.575            1st Qu.: 1946    
##  Median : 2.1850   Median : 35.650            Median : 5493    
##  Mean   : 5.0928   Mean   : 43.282            Mean   :11179    
##  3rd Qu.: 7.0925   3rd Qu.: 75.275            3rd Qu.:14490    
##  Max.   :57.7200   Max.   :154.800            Max.   :70014    
##  NA's   :6         NA's   :6                  NA's   :4        
##  Inflation....      Business.Freedom   Internet_users  Democracy_score 
##  Length:180         Length:180         Min.   : 0.00   Min.   :-10.00  
##  Class :character   Class :character   1st Qu.: 2.60   1st Qu.: -3.00  
##  Mode  :character   Mode  :character   Median : 8.20   Median :  6.00  
##                                        Mean   :17.32   Mean   :  3.34  
##                                        3rd Qu.:26.07   3rd Qu.:  9.00  
##                                        Max.   :76.20   Max.   : 10.00  
##                                        NA's   :6       NA's   :27      
##  Judical.Effectiveness X2022.Score        Property.Rights    Unemployment....  
##  Length:180            Length:180         Length:180         Length:180        
##  Class :character      Class :character   Class :character   Class :character  
##  Mode  :character      Mode  :character   Mode  :character   Mode  :character  
##                                                                                
##                                                                                
##                                                                                
##                                                                                
##  GDP.per.Capita..PPP. Education_equality
##  Length:180           Min.   :0.5455    
##  Class :character     1st Qu.:0.9673    
##  Mode  :character     Median :1.0000    
##                       Mean   :0.9715    
##                       3rd Qu.:1.0104    
##                       Max.   :1.0989    
##                       NA's   :10
# There are also some NAs that appear as characters ("n/a").
# So let's change them by NA, so we can deal with all of them easier.
data[data == "n/a" |data == "N/A" ] <- NA
# Check that we have replace all of them:
which(data=="n/a" | data== "N/A")
## integer(0)

Casting variables

data$X2022.Score= as.numeric(data$X2022.Score)
data$Property.Rights= as.numeric(data$Property.Rights)
data$Judical.Effectiveness= as.numeric(data$Judical.Effectiveness)
data$Inflation....=as.numeric(data$Inflation....)
data$Unemployment....= as.numeric(data$Unemployment....)
data$Business.Freedom = as.numeric(data$Business.Freedom)

How can we cast the numbers that can with a dollar?

# The variable of the GDP has the problem that comes with the dollar sign ($),so we need to remove it before we cast it to numeric
for(i in 1:nrow(data)){
  data$GDP.per.Capita..PPP.[i] = 
    substr(data$GDP.per.Capita..PPP.[i], 2,nchar(data$GDP.per.Capita..PPP.[i]))
}

data$GDP.per.Capita..PPP. =as.numeric(gsub(",", "", data$GDP.per.Capita..PPP.))
# Gsub is for R to know that the decimal value is the comma.

Let’s change the name of the variables so they are shorter:

# Change the name of the variables so they are shorter
colnames(data) <- c('Country', 'Fertility', 'GNI_Capita_PPP', 'Poverty','Adult_Mortality', 'Infant_Mortality',
                    'Life_Expentancy', 'Under5_mortality', 'CO2', 'Cell_phones',
                    'Income_per_person', 'Inflation', 'Business_Freedom', 
                    "Internet_users","Democracy","Judical_Effectiveness","Economic_freedom",
                    "Property_Rights", "Unemployment", 'GDP_capita_PPP', 'Education_Equality')

NA’s and outliers

# How many NAs we have?
sum(is.na(data))
## [1] 215
length(which(is.na(data))) # Another option
## [1] 215
# Now we are going to see more in detail how the missing values are 
# distributed in our data set:
# How many rows contain missing values?
length(which(!complete.cases(data)))
## [1] 110
summary(aggr(data))

## 
##  Missings per variable: 
##               Variable Count
##                Country     0
##              Fertility     1
##         GNI_Capita_PPP     9
##                Poverty   109
##        Adult_Mortality     1
##       Infant_Mortality     1
##        Life_Expentancy     1
##       Under5_mortality     1
##                    CO2     6
##            Cell_phones     6
##      Income_per_person     4
##              Inflation     4
##       Business_Freedom     6
##         Internet_users     6
##              Democracy    27
##  Judical_Effectiveness     6
##       Economic_freedom     6
##        Property_Rights     6
##           Unemployment     3
##         GDP_capita_PPP     2
##     Education_Equality    10
## 
##  Missings in combinations of variables: 
##                               Combinations Count    Percent
##  0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0    70 38.8888889
##  0:0:0:0:0:0:0:0:0:0:0:0:0:0:1:0:0:0:0:0:0     1  0.5555556
##  0:0:0:1:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0    67 37.2222222
##  0:0:0:1:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:1     3  1.6666667
##  0:0:0:1:0:0:0:0:0:0:0:0:0:0:1:0:0:0:0:0:0    17  9.4444444
##  0:0:0:1:0:0:0:0:0:0:0:0:0:0:1:0:0:0:0:0:1     1  0.5555556
##  0:0:0:1:0:0:0:0:0:0:0:0:0:0:1:0:0:0:1:0:0     3  1.6666667
##  0:0:0:1:0:0:0:0:0:0:0:0:1:0:0:1:1:1:0:0:0     2  1.1111111
##  0:0:0:1:0:0:0:0:0:0:0:1:1:0:0:1:1:1:0:1:0     1  0.5555556
##  0:0:0:1:0:0:0:0:1:0:0:0:0:0:0:0:0:0:0:0:0     2  1.1111111
##  0:0:0:1:0:0:0:0:1:1:1:0:0:1:1:0:0:0:0:0:0     1  0.5555556
##  0:0:0:1:0:0:0:0:1:1:1:0:0:1:1:0:0:0:0:0:1     3  1.6666667
##  0:0:1:1:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0     2  1.1111111
##  0:0:1:1:0:0:0:0:0:0:0:0:0:0:1:0:0:0:0:0:0     1  0.5555556
##  0:0:1:1:0:0:0:0:0:0:0:0:1:0:0:1:1:1:0:0:0     1  0.5555556
##  0:0:1:1:0:0:0:0:0:0:0:0:1:0:0:1:1:1:0:0:1     1  0.5555556
##  0:0:1:1:0:0:0:0:0:0:0:1:0:0:0:0:0:0:0:0:0     1  0.5555556
##  0:0:1:1:0:0:0:0:0:0:0:1:1:0:0:1:1:1:0:0:0     1  0.5555556
##  0:0:1:1:0:0:0:0:0:1:0:1:0:1:0:0:0:0:0:1:1     1  0.5555556
##  0:1:1:1:1:1:1:1:0:1:0:0:0:1:0:0:0:0:0:0:1     1  0.5555556
# The plot on the right part give us the following information (that it is also showed in detail on the console):
# The  blue row (on the bottom) shows that there are some rows that contain no NA (the 39% of our rows indeed)
# The second row from the bottom also shows us that there are also some rows with no NA except in the variable Poverty (that we have already removed), which represents the 38 % of our data.
# Another significant thing that we can conclude from the top row of the graph is that there is a row were many variables have a missing value.


# We see that the variable poverty has NA in more than half of the rows, so we are going to remove that variable.
data$Poverty=NULL

Another way to visualize the NAs

missmap(data, main = "Missing Values", col = c("pink", "snow2"))

Remove rows with many NAs

# To know the number of NA per row or column:
colSums(is.na(data))
##               Country             Fertility        GNI_Capita_PPP 
##                     0                     1                     9 
##       Adult_Mortality      Infant_Mortality       Life_Expentancy 
##                     1                     1                     1 
##      Under5_mortality                   CO2           Cell_phones 
##                     1                     6                     6 
##     Income_per_person             Inflation      Business_Freedom 
##                     4                     4                     6 
##        Internet_users             Democracy Judical_Effectiveness 
##                     6                    27                     6 
##      Economic_freedom       Property_Rights          Unemployment 
##                     6                     6                     3 
##        GDP_capita_PPP    Education_Equality 
##                     2                    10
rowSums(is.na(data))
##   [1] 6 0 0 0 0 0 0 1 0 2 0 0 1 0 0 1 0 0 0 2 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0
##  [38] 1 0 0 2 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
##  [75] 0 5 0 0 0 0 0 0 0 0 2 6 0 0 0 0 0 0 1 0 4 0 1 1 0 0 0 1 0 1 0 0 0 6 0 0 6
## [112] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 1 1 1 0 0 6 2 0 0 0 1 0 6
## [149] 0 0 0 0 1 0 1 6 9 0 0 0 5 0 1 0 0 0 1 1 0 0 0 0 0 0 1 0 0 4 0 1
#Let's remove every row with more than 6 missing values. 
vec <-rowSums(is.na(data))
data <- data[-which(vec>6), ]


# If we have a look at our data again, it looks as follows:
summary(aggr(data))

## 
##  Missings per variable: 
##               Variable Count
##                Country     0
##              Fertility     0
##         GNI_Capita_PPP     8
##        Adult_Mortality     0
##       Infant_Mortality     0
##        Life_Expentancy     0
##       Under5_mortality     0
##                    CO2     6
##            Cell_phones     5
##      Income_per_person     4
##              Inflation     4
##       Business_Freedom     6
##         Internet_users     5
##              Democracy    27
##  Judical_Effectiveness     6
##       Economic_freedom     6
##        Property_Rights     6
##           Unemployment     3
##         GDP_capita_PPP     2
##     Education_Equality     9
## 
##  Missings in combinations of variables: 
##                             Combinations Count    Percent
##  0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0   137 76.5363128
##  0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:1     3  1.6759777
##  0:0:0:0:0:0:0:0:0:0:0:0:0:1:0:0:0:0:0:0    18 10.0558659
##  0:0:0:0:0:0:0:0:0:0:0:0:0:1:0:0:0:0:0:1     1  0.5586592
##  0:0:0:0:0:0:0:0:0:0:0:0:0:1:0:0:0:1:0:0     3  1.6759777
##  0:0:0:0:0:0:0:0:0:0:0:1:0:0:1:1:1:0:0:0     2  1.1173184
##  0:0:0:0:0:0:0:0:0:0:1:1:0:0:1:1:1:0:1:0     1  0.5586592
##  0:0:0:0:0:0:0:1:0:0:0:0:0:0:0:0:0:0:0:0     2  1.1173184
##  0:0:0:0:0:0:0:1:1:1:0:0:1:1:0:0:0:0:0:0     1  0.5586592
##  0:0:0:0:0:0:0:1:1:1:0:0:1:1:0:0:0:0:0:1     3  1.6759777
##  0:0:1:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0     2  1.1173184
##  0:0:1:0:0:0:0:0:0:0:0:0:0:1:0:0:0:0:0:0     1  0.5586592
##  0:0:1:0:0:0:0:0:0:0:0:1:0:0:1:1:1:0:0:0     1  0.5586592
##  0:0:1:0:0:0:0:0:0:0:0:1:0:0:1:1:1:0:0:1     1  0.5586592
##  0:0:1:0:0:0:0:0:0:0:1:0:0:0:0:0:0:0:0:0     1  0.5586592
##  0:0:1:0:0:0:0:0:0:0:1:1:0:0:1:1:1:0:0:0     1  0.5586592
##  0:0:1:0:0:0:0:0:1:0:1:0:1:0:0:0:0:0:1:1     1  0.5586592

Replacement of missing values

# To do this we are going to use Multiple Imputation:
set.seed(123)
md.pattern(data)

##     Country Fertility Adult_Mortality Infant_Mortality Life_Expentancy
## 137       1         1               1                1               1
## 18        1         1               1                1               1
## 3         1         1               1                1               1
## 1         1         1               1                1               1
## 2         1         1               1                1               1
## 1         1         1               1                1               1
## 2         1         1               1                1               1
## 1         1         1               1                1               1
## 1         1         1               1                1               1
## 2         1         1               1                1               1
## 1         1         1               1                1               1
## 1         1         1               1                1               1
## 1         1         1               1                1               1
## 3         1         1               1                1               1
## 3         1         1               1                1               1
## 1         1         1               1                1               1
## 1         1         1               1                1               1
##           0         0               0                0               0
##     Under5_mortality GDP_capita_PPP Unemployment Income_per_person Inflation
## 137                1              1            1                 1         1
## 18                 1              1            1                 1         1
## 3                  1              1            1                 1         1
## 1                  1              1            1                 1         1
## 2                  1              1            1                 1         1
## 1                  1              1            1                 1         1
## 2                  1              1            1                 1         1
## 1                  1              1            1                 1         1
## 1                  1              1            1                 1         1
## 2                  1              1            1                 1         1
## 1                  1              1            1                 1         0
## 1                  1              1            1                 1         0
## 1                  1              1            1                 0         1
## 3                  1              1            1                 0         1
## 3                  1              1            0                 1         1
## 1                  1              0            1                 1         0
## 1                  1              0            1                 1         0
##                    0              2            3                 4         4
##     Cell_phones Internet_users CO2 Business_Freedom Judical_Effectiveness
## 137           1              1   1                1                     1
## 18            1              1   1                1                     1
## 3             1              1   1                1                     1
## 1             1              1   1                1                     1
## 2             1              1   1                1                     1
## 1             1              1   1                1                     1
## 2             1              1   1                0                     0
## 1             1              1   1                0                     0
## 1             1              1   1                0                     0
## 2             1              1   0                1                     1
## 1             1              1   1                1                     1
## 1             1              1   1                0                     0
## 1             0              0   0                1                     1
## 3             0              0   0                1                     1
## 3             1              1   1                1                     1
## 1             1              1   1                0                     0
## 1             0              0   1                1                     1
##               5              5   6                6                     6
##     Economic_freedom Property_Rights GNI_Capita_PPP Education_Equality
## 137                1               1              1                  1
## 18                 1               1              1                  1
## 3                  1               1              1                  0
## 1                  1               1              1                  0
## 2                  1               1              0                  1
## 1                  1               1              0                  1
## 2                  0               0              1                  1
## 1                  0               0              0                  1
## 1                  0               0              0                  0
## 2                  1               1              1                  1
## 1                  1               1              0                  1
## 1                  0               0              0                  1
## 1                  1               1              1                  1
## 3                  1               1              1                  0
## 3                  1               1              1                  1
## 1                  0               0              1                  1
## 1                  1               1              0                  0
##                    6               6              8                  9
##     Democracy   
## 137         1  0
## 18          0  1
## 3           1  1
## 1           0  2
## 2           1  1
## 1           0  2
## 2           1  4
## 1           1  5
## 1           1  6
## 2           1  1
## 1           1  2
## 1           1  6
## 1           0  5
## 3           0  6
## 3           0  2
## 1           1  6
## 1           1  6
##            27 97
vec1 <- c()
# Which are the variables with missing values?
for(i in 1:ncol(data)){
  if(any(is.na(data[,i]))){
    vec1 <- c(vec1,i)
  }
}

# We use Random Forest imputations
imp=mice(data[,vec1], method = 'rf')
## 
##  iter imp variable
##   1   1  GNI_Capita_PPP  CO2  Cell_phones  Income_per_person  Inflation  Business_Freedom  Internet_users  Democracy  Judical_Effectiveness  Economic_freedom  Property_Rights  Unemployment  GDP_capita_PPP  Education_Equality
##   1   2  GNI_Capita_PPP  CO2  Cell_phones  Income_per_person  Inflation  Business_Freedom  Internet_users  Democracy  Judical_Effectiveness  Economic_freedom  Property_Rights  Unemployment  GDP_capita_PPP  Education_Equality
##   1   3  GNI_Capita_PPP  CO2  Cell_phones  Income_per_person  Inflation  Business_Freedom  Internet_users  Democracy  Judical_Effectiveness  Economic_freedom  Property_Rights  Unemployment  GDP_capita_PPP  Education_Equality
##   1   4  GNI_Capita_PPP  CO2  Cell_phones  Income_per_person  Inflation  Business_Freedom  Internet_users  Democracy  Judical_Effectiveness  Economic_freedom  Property_Rights  Unemployment  GDP_capita_PPP  Education_Equality
##   1   5  GNI_Capita_PPP  CO2  Cell_phones  Income_per_person  Inflation  Business_Freedom  Internet_users  Democracy  Judical_Effectiveness  Economic_freedom  Property_Rights  Unemployment  GDP_capita_PPP  Education_Equality
##   2   1  GNI_Capita_PPP  CO2  Cell_phones  Income_per_person  Inflation  Business_Freedom  Internet_users  Democracy  Judical_Effectiveness  Economic_freedom  Property_Rights  Unemployment  GDP_capita_PPP  Education_Equality
##   2   2  GNI_Capita_PPP  CO2  Cell_phones  Income_per_person  Inflation  Business_Freedom  Internet_users  Democracy  Judical_Effectiveness  Economic_freedom  Property_Rights  Unemployment  GDP_capita_PPP  Education_Equality
##   2   3  GNI_Capita_PPP  CO2  Cell_phones  Income_per_person  Inflation  Business_Freedom  Internet_users  Democracy  Judical_Effectiveness  Economic_freedom  Property_Rights  Unemployment  GDP_capita_PPP  Education_Equality
##   2   4  GNI_Capita_PPP  CO2  Cell_phones  Income_per_person  Inflation  Business_Freedom  Internet_users  Democracy  Judical_Effectiveness  Economic_freedom  Property_Rights  Unemployment  GDP_capita_PPP  Education_Equality
##   2   5  GNI_Capita_PPP  CO2  Cell_phones  Income_per_person  Inflation  Business_Freedom  Internet_users  Democracy  Judical_Effectiveness  Economic_freedom  Property_Rights  Unemployment  GDP_capita_PPP  Education_Equality
##   3   1  GNI_Capita_PPP  CO2  Cell_phones  Income_per_person  Inflation  Business_Freedom  Internet_users  Democracy  Judical_Effectiveness  Economic_freedom  Property_Rights  Unemployment  GDP_capita_PPP  Education_Equality
##   3   2  GNI_Capita_PPP  CO2  Cell_phones  Income_per_person  Inflation  Business_Freedom  Internet_users  Democracy  Judical_Effectiveness  Economic_freedom  Property_Rights  Unemployment  GDP_capita_PPP  Education_Equality
##   3   3  GNI_Capita_PPP  CO2  Cell_phones  Income_per_person  Inflation  Business_Freedom  Internet_users  Democracy  Judical_Effectiveness  Economic_freedom  Property_Rights  Unemployment  GDP_capita_PPP  Education_Equality
##   3   4  GNI_Capita_PPP  CO2  Cell_phones  Income_per_person  Inflation  Business_Freedom  Internet_users  Democracy  Judical_Effectiveness  Economic_freedom  Property_Rights  Unemployment  GDP_capita_PPP  Education_Equality
##   3   5  GNI_Capita_PPP  CO2  Cell_phones  Income_per_person  Inflation  Business_Freedom  Internet_users  Democracy  Judical_Effectiveness  Economic_freedom  Property_Rights  Unemployment  GDP_capita_PPP  Education_Equality
##   4   1  GNI_Capita_PPP  CO2  Cell_phones  Income_per_person  Inflation  Business_Freedom  Internet_users  Democracy  Judical_Effectiveness  Economic_freedom  Property_Rights  Unemployment  GDP_capita_PPP  Education_Equality
##   4   2  GNI_Capita_PPP  CO2  Cell_phones  Income_per_person  Inflation  Business_Freedom  Internet_users  Democracy  Judical_Effectiveness  Economic_freedom  Property_Rights  Unemployment  GDP_capita_PPP  Education_Equality
##   4   3  GNI_Capita_PPP  CO2  Cell_phones  Income_per_person  Inflation  Business_Freedom  Internet_users  Democracy  Judical_Effectiveness  Economic_freedom  Property_Rights  Unemployment  GDP_capita_PPP  Education_Equality
##   4   4  GNI_Capita_PPP  CO2  Cell_phones  Income_per_person  Inflation  Business_Freedom  Internet_users  Democracy  Judical_Effectiveness  Economic_freedom  Property_Rights  Unemployment  GDP_capita_PPP  Education_Equality
##   4   5  GNI_Capita_PPP  CO2  Cell_phones  Income_per_person  Inflation  Business_Freedom  Internet_users  Democracy  Judical_Effectiveness  Economic_freedom  Property_Rights  Unemployment  GDP_capita_PPP  Education_Equality
##   5   1  GNI_Capita_PPP  CO2  Cell_phones  Income_per_person  Inflation  Business_Freedom  Internet_users  Democracy  Judical_Effectiveness  Economic_freedom  Property_Rights  Unemployment  GDP_capita_PPP  Education_Equality
##   5   2  GNI_Capita_PPP  CO2  Cell_phones  Income_per_person  Inflation  Business_Freedom  Internet_users  Democracy  Judical_Effectiveness  Economic_freedom  Property_Rights  Unemployment  GDP_capita_PPP  Education_Equality
##   5   3  GNI_Capita_PPP  CO2  Cell_phones  Income_per_person  Inflation  Business_Freedom  Internet_users  Democracy  Judical_Effectiveness  Economic_freedom  Property_Rights  Unemployment  GDP_capita_PPP  Education_Equality
##   5   4  GNI_Capita_PPP  CO2  Cell_phones  Income_per_person  Inflation  Business_Freedom  Internet_users  Democracy  Judical_Effectiveness  Economic_freedom  Property_Rights  Unemployment  GDP_capita_PPP  Education_Equality
##   5   5  GNI_Capita_PPP  CO2  Cell_phones  Income_per_person  Inflation  Business_Freedom  Internet_users  Democracy  Judical_Effectiveness  Economic_freedom  Property_Rights  Unemployment  GDP_capita_PPP  Education_Equality
## Warning: Number of logged events: 18

Logged events

# A warning is shown, we have 25 logged events
head(imp$loggedEvents, 3)
tail(imp$loggedEvents, 3)
imp$loggedEvents  
# What we see is that it is computing the missing values of GNI_Capita
# with the variable Income, and vice versa.

# What we can assume by analyzing this is that this two variables are
# highly correlated. Let's check it:
pairs(data$GNI_Capita_PPP ~ data$Income_per_person)

# They are highly correlated.
# After doing some reseacrh this is because the variable GNI_capita_PPP represents the gross national income per capita adjusted to the purchasing power parity, and the variable income is the gross national income but not adjusted to the purchasing power parity. Now that we have notice this, we can remove the variable Income of our dataset (GNI per capita PPP is better to do comparitions between countries than GNI per capita).
data = data[, -10]

# Now, we do the multiple imputation again:
vec1 <- c()
for(i in 1:ncol(data)){
  if(any(is.na(data[,i]))){
    vec1 <- c(vec1,i)
  }
}

imp=mice(data[,vec1], method = 'rf')
## 
##  iter imp variable
##   1   1  GNI_Capita_PPP  CO2  Cell_phones  Inflation  Business_Freedom  Internet_users  Democracy  Judical_Effectiveness  Economic_freedom  Property_Rights  Unemployment  GDP_capita_PPP  Education_Equality
##   1   2  GNI_Capita_PPP  CO2  Cell_phones  Inflation  Business_Freedom  Internet_users  Democracy  Judical_Effectiveness  Economic_freedom  Property_Rights  Unemployment  GDP_capita_PPP  Education_Equality
##   1   3  GNI_Capita_PPP  CO2  Cell_phones  Inflation  Business_Freedom  Internet_users  Democracy  Judical_Effectiveness  Economic_freedom  Property_Rights  Unemployment  GDP_capita_PPP  Education_Equality
##   1   4  GNI_Capita_PPP  CO2  Cell_phones  Inflation  Business_Freedom  Internet_users  Democracy  Judical_Effectiveness  Economic_freedom  Property_Rights  Unemployment  GDP_capita_PPP  Education_Equality
##   1   5  GNI_Capita_PPP  CO2  Cell_phones  Inflation  Business_Freedom  Internet_users  Democracy  Judical_Effectiveness  Economic_freedom  Property_Rights  Unemployment  GDP_capita_PPP  Education_Equality
##   2   1  GNI_Capita_PPP  CO2  Cell_phones  Inflation  Business_Freedom  Internet_users  Democracy  Judical_Effectiveness  Economic_freedom  Property_Rights  Unemployment  GDP_capita_PPP  Education_Equality
##   2   2  GNI_Capita_PPP  CO2  Cell_phones  Inflation  Business_Freedom  Internet_users  Democracy  Judical_Effectiveness  Economic_freedom  Property_Rights  Unemployment  GDP_capita_PPP  Education_Equality
##   2   3  GNI_Capita_PPP  CO2  Cell_phones  Inflation  Business_Freedom  Internet_users  Democracy  Judical_Effectiveness  Economic_freedom  Property_Rights  Unemployment  GDP_capita_PPP  Education_Equality
##   2   4  GNI_Capita_PPP  CO2  Cell_phones  Inflation  Business_Freedom  Internet_users  Democracy  Judical_Effectiveness  Economic_freedom  Property_Rights  Unemployment  GDP_capita_PPP  Education_Equality
##   2   5  GNI_Capita_PPP  CO2  Cell_phones  Inflation  Business_Freedom  Internet_users  Democracy  Judical_Effectiveness  Economic_freedom  Property_Rights  Unemployment  GDP_capita_PPP  Education_Equality
##   3   1  GNI_Capita_PPP  CO2  Cell_phones  Inflation  Business_Freedom  Internet_users  Democracy  Judical_Effectiveness  Economic_freedom  Property_Rights  Unemployment  GDP_capita_PPP  Education_Equality
##   3   2  GNI_Capita_PPP  CO2  Cell_phones  Inflation  Business_Freedom  Internet_users  Democracy  Judical_Effectiveness  Economic_freedom  Property_Rights  Unemployment  GDP_capita_PPP  Education_Equality
##   3   3  GNI_Capita_PPP  CO2  Cell_phones  Inflation  Business_Freedom  Internet_users  Democracy  Judical_Effectiveness  Economic_freedom  Property_Rights  Unemployment  GDP_capita_PPP  Education_Equality
##   3   4  GNI_Capita_PPP  CO2  Cell_phones  Inflation  Business_Freedom  Internet_users  Democracy  Judical_Effectiveness  Economic_freedom  Property_Rights  Unemployment  GDP_capita_PPP  Education_Equality
##   3   5  GNI_Capita_PPP  CO2  Cell_phones  Inflation  Business_Freedom  Internet_users  Democracy  Judical_Effectiveness  Economic_freedom  Property_Rights  Unemployment  GDP_capita_PPP  Education_Equality
##   4   1  GNI_Capita_PPP  CO2  Cell_phones  Inflation  Business_Freedom  Internet_users  Democracy  Judical_Effectiveness  Economic_freedom  Property_Rights  Unemployment  GDP_capita_PPP  Education_Equality
##   4   2  GNI_Capita_PPP  CO2  Cell_phones  Inflation  Business_Freedom  Internet_users  Democracy  Judical_Effectiveness  Economic_freedom  Property_Rights  Unemployment  GDP_capita_PPP  Education_Equality
##   4   3  GNI_Capita_PPP  CO2  Cell_phones  Inflation  Business_Freedom  Internet_users  Democracy  Judical_Effectiveness  Economic_freedom  Property_Rights  Unemployment  GDP_capita_PPP  Education_Equality
##   4   4  GNI_Capita_PPP  CO2  Cell_phones  Inflation  Business_Freedom  Internet_users  Democracy  Judical_Effectiveness  Economic_freedom  Property_Rights  Unemployment  GDP_capita_PPP  Education_Equality
##   4   5  GNI_Capita_PPP  CO2  Cell_phones  Inflation  Business_Freedom  Internet_users  Democracy  Judical_Effectiveness  Economic_freedom  Property_Rights  Unemployment  GDP_capita_PPP  Education_Equality
##   5   1  GNI_Capita_PPP  CO2  Cell_phones  Inflation  Business_Freedom  Internet_users  Democracy  Judical_Effectiveness  Economic_freedom  Property_Rights  Unemployment  GDP_capita_PPP  Education_Equality
##   5   2  GNI_Capita_PPP  CO2  Cell_phones  Inflation  Business_Freedom  Internet_users  Democracy  Judical_Effectiveness  Economic_freedom  Property_Rights  Unemployment  GDP_capita_PPP  Education_Equality
##   5   3  GNI_Capita_PPP  CO2  Cell_phones  Inflation  Business_Freedom  Internet_users  Democracy  Judical_Effectiveness  Economic_freedom  Property_Rights  Unemployment  GDP_capita_PPP  Education_Equality
##   5   4  GNI_Capita_PPP  CO2  Cell_phones  Inflation  Business_Freedom  Internet_users  Democracy  Judical_Effectiveness  Economic_freedom  Property_Rights  Unemployment  GDP_capita_PPP  Education_Equality
##   5   5  GNI_Capita_PPP  CO2  Cell_phones  Inflation  Business_Freedom  Internet_users  Democracy  Judical_Effectiveness  Economic_freedom  Property_Rights  Unemployment  GDP_capita_PPP  Education_Equality
data_imp=mice::complete(imp)
data$GNI_Capita_PPP = data_imp$GNI_Capita_PPP
data$CO2 =data_imp$CO2
data$Cell_phones = data_imp$Cell_phones
data$Inflation = data_imp$Inflation
data$Business_Freedom = data_imp$Business_Freedom
data$Internet_users = data_imp$Internet_users
data$Democracy = data_imp$Democracy
data$Judical_Effectiveness = data_imp$Judical_Effectiveness
data$Economic_freedom = data_imp$Economic_freedom
data$Property_Rights = data_imp$Property_Rights
data$Unemployment = data_imp$Unemployment
data$GDP_capita_PPP = data_imp$GDP_capita_PPP
data$Education_Equality = data_imp$Education_Equality

# Now, we don't have any missing value:
any(is.na(data)==TRUE)
## [1] FALSE

Let’s add this categorical variable which later may be interesting for the graphs

data$Region = total.data$Region[-which(vec>6)]
data$Region= as.factor(data$Region)

Detection of outliers

With graphs:

# I am specially interested in the variable that measures the economic freedom of a country, so let's see if it has many outliers and we need to reduce noise. 
ggplot(data = data)+aes(x=Unemployment, y = Economic_freedom, color =Region)+geom_point()

ggplot(data = data)+aes(x=Region, y = Economic_freedom, fill =Region)+geom_boxplot()

ggplot(data) +
  aes(x = Economic_freedom) +
  geom_histogram(bins = 30L, fill = "red") +
  theme_minimal()

# With this 3 graphs we see clearly that, at least we are going to have one outlier (the value far away from the rest in the histogram)

The easiest way to identify the outliers with a graph is doing a boxplot

boxplot(data$Economic_freedom,ylab = "Economic Freedom")

# We have 3 outliers, but there is one that it is specially extreme, which is North Korea:
min(data$Economic_freedom)
## [1] 3
data$Country[which(data$Economic_freedom ==min(data$Economic_freedom))]
## [1] "Korea, North "

Another ways to identify the outliers:

# 1) With function outlier
idx = outlier(data$Economic_freedom, logical=T)
data$Country[idx]
## [1] "Korea, North "
# 2) 3 sigma method
mu <- mean(data$Economic_freedom)
sigma <- sd(data$Economic_freedom)

sum(data$Economic_freedom < mu - 3*sigma | data$Economic_freedom > mu + 3*sigma)
## [1] 1
data$Country[which(data$Economic_freedom < mu - 3*sigma | data$Economic_freedom > mu + 3*sigma)]
## [1] "Korea, North "
# 3)Identification by IQR:
QI <- quantile(data$Economic_freedom, 0.25, na.rm = TRUE)
QS <- quantile(data$Economic_freedom, 0.75, na.rm = TRUE)
IQR = QS-QI

sum(data$Economic_freedom < QI - 1.5*IQR | data$Economic_freedom > QS + 1.5*IQR)
## [1] 4
data$Country[which(data$Economic_freedom < QI - 1.5*IQR | data$Economic_freedom > QS + 1.5*IQR)]
## [1] "Afghanistan"   "Cuba"          "Korea, North " "Venezuela"

There may be allso outliers with respect to other variables, for instance, if we try to explain the GNI by economic freedom and unemployment rate:

# Let’s explain the GNI by economic freedom and unemployment rate:
lm.fit = lm(Economic_freedom ~GNI_Capita_PPP, data)
resid = residuals(lm.fit)
qplot(data$GNI_Capita_PPP, resid)

summary(lm(Economic_freedom ~ Country + GNI_Capita_PPP, data))
## 
## Call:
## lm(formula = Economic_freedom ~ Country + GNI_Capita_PPP, data = data)
## 
## Residuals:
## ALL 179 residuals are 0: no residual degrees of freedom!
## 
## Coefficients: (1 not defined because of singularities)
##                                           Estimate Std. Error t value Pr(>|t|)
## (Intercept)                              2.950e+01        NaN     NaN      NaN
## CountryAlbania                           3.710e+01        NaN     NaN      NaN
## CountryAlgeria                           1.630e+01        NaN     NaN      NaN
## CountryAngola                            2.310e+01        NaN     NaN      NaN
## CountryArgentina                         2.060e+01        NaN     NaN      NaN
## CountryArmenia                           3.580e+01        NaN     NaN      NaN
## CountryAustralia                         4.820e+01        NaN     NaN      NaN
## CountryAustria                           4.430e+01        NaN     NaN      NaN
## CountryAzerbaijan                        3.210e+01        NaN     NaN      NaN
## CountryBahamas                           3.920e+01        NaN     NaN      NaN
## CountryBahrain                           3.250e+01        NaN     NaN      NaN
## CountryBangladesh                        2.320e+01        NaN     NaN      NaN
## CountryBarbados                          4.180e+01        NaN     NaN      NaN
## CountryBelarus                           2.350e+01        NaN     NaN      NaN
## CountryBelgium                           4.010e+01        NaN     NaN      NaN
## CountryBelize                            2.710e+01        NaN     NaN      NaN
## CountryBenin                             3.150e+01        NaN     NaN      NaN
## CountryBhutan                            2.980e+01        NaN     NaN      NaN
## CountryBolivia                           1.350e+01        NaN     NaN      NaN
## CountryBosnia and Herzegovina            3.390e+01        NaN     NaN      NaN
## CountryBotswana                          3.530e+01        NaN     NaN      NaN
## CountryBrazil                            2.380e+01        NaN     NaN      NaN
## CountryBrunei Darussalam                 3.530e+01        NaN     NaN      NaN
## CountryBulgaria                          4.150e+01        NaN     NaN      NaN
## CountryBurkina Faso                      2.880e+01        NaN     NaN      NaN
## CountryBurundi                           9.900e+00        NaN     NaN      NaN
## CountryCabo Verde                        3.720e+01        NaN     NaN      NaN
## CountryCambodia                          2.760e+01        NaN     NaN      NaN
## CountryCameroon                          2.340e+01        NaN     NaN      NaN
## CountryCanada                            4.710e+01        NaN     NaN      NaN
## CountryCentral African Republic          1.620e+01        NaN     NaN      NaN
## CountryChad                              2.030e+01        NaN     NaN      NaN
## CountryChile                             4.490e+01        NaN     NaN      NaN
## CountryChina                             1.850e+01        NaN     NaN      NaN
## CountryColombia                          3.560e+01        NaN     NaN      NaN
## CountryComoros                           2.090e+01        NaN     NaN      NaN
## CountryCongo, Dem. Rep.                  1.810e+01        NaN     NaN      NaN
## CountryCongo, Rep.                       1.900e+01        NaN     NaN      NaN
## CountryCosta Rica                        3.590e+01        NaN     NaN      NaN
## CountryCroatia                           3.810e+01        NaN     NaN      NaN
## CountryCuba                              4.724e-13        NaN     NaN      NaN
## CountryCyprus                            4.340e+01        NaN     NaN      NaN
## CountryCzech Republic                    4.490e+01        NaN     NaN      NaN
## CountryDenmark                           4.850e+01        NaN     NaN      NaN
## CountryDjibouti                          2.580e+01        NaN     NaN      NaN
## CountryDominica                          2.490e+01        NaN     NaN      NaN
## CountryDominican Republic                3.350e+01        NaN     NaN      NaN
## CountryEcuador                           2.480e+01        NaN     NaN      NaN
## CountryEgypt                             1.960e+01        NaN     NaN      NaN
## CountryEl Salvador                       3.010e+01        NaN     NaN      NaN
## CountryEquatorial Guinea                 1.770e+01        NaN     NaN      NaN
## CountryEritrea                           1.020e+01        NaN     NaN      NaN
## CountryEstonia                           5.050e+01        NaN     NaN      NaN
## CountryEswatini                          2.190e+01        NaN     NaN      NaN
## CountryEthiopia                          2.010e+01        NaN     NaN      NaN
## CountryFiji                              2.690e+01        NaN     NaN      NaN
## CountryFinland                           4.880e+01        NaN     NaN      NaN
## CountryFrance                            3.640e+01        NaN     NaN      NaN
## CountryGabon                             2.630e+01        NaN     NaN      NaN
## CountryGambia                            2.850e+01        NaN     NaN      NaN
## CountryGeorgia                           4.230e+01        NaN     NaN      NaN
## CountryGermany                           4.660e+01        NaN     NaN      NaN
## CountryGhana                             3.030e+01        NaN     NaN      NaN
## CountryGreece                            3.200e+01        NaN     NaN      NaN
## CountryGuatemala                         3.370e+01        NaN     NaN      NaN
## CountryGuinea                            2.470e+01        NaN     NaN      NaN
## CountryGuinea-Bissau                     1.650e+01        NaN     NaN      NaN
## CountryGuyana                            3.000e+01        NaN     NaN      NaN
## CountryHaiti                             2.050e+01        NaN     NaN      NaN
## CountryHonduras                          3.000e+01        NaN     NaN      NaN
## CountryHungary                           3.740e+01        NaN     NaN      NaN
## CountryIceland                           4.750e+01        NaN     NaN      NaN
## CountryIndia                             2.440e+01        NaN     NaN      NaN
## CountryIndonesia                         3.490e+01        NaN     NaN      NaN
## CountryIran                              1.290e+01        NaN     NaN      NaN
## CountryIraq                              2.340e+01        NaN     NaN      NaN
## CountryIreland                           5.250e+01        NaN     NaN      NaN
## CountryIsrael                            3.850e+01        NaN     NaN      NaN
## CountryItaly                             3.590e+01        NaN     NaN      NaN
## CountryJamaica                           3.790e+01        NaN     NaN      NaN
## CountryJapan                             4.040e+01        NaN     NaN      NaN
## CountryJordan                            3.060e+01        NaN     NaN      NaN
## CountryKazakhstan                        3.490e+01        NaN     NaN      NaN
## CountryKenya                             2.310e+01        NaN     NaN      NaN
## CountryKiribati                          2.970e+01        NaN     NaN      NaN
## CountryKorea, North                     -2.650e+01        NaN     NaN      NaN
## CountryKorea, South                      4.510e+01        NaN     NaN      NaN
## CountryKuwait                            2.880e+01        NaN     NaN      NaN
## CountryKyrgyzstan                        2.630e+01        NaN     NaN      NaN
## CountryLao P.D.R.                        1.970e+01        NaN     NaN      NaN
## CountryLatvia                            4.530e+01        NaN     NaN      NaN
## CountryLebanon                           1.780e+01        NaN     NaN      NaN
## CountryLesotho                           1.860e+01        NaN     NaN      NaN
## CountryLiberia                           1.840e+01        NaN     NaN      NaN
## CountryLibya                             4.230e+01        NaN     NaN      NaN
## CountryLithuania                         4.630e+01        NaN     NaN      NaN
## CountryLuxembourg                        5.110e+01        NaN     NaN      NaN
## CountryMacedonia                         3.620e+01        NaN     NaN      NaN
## CountryMadagascar                        2.940e+01        NaN     NaN      NaN
## CountryMalawi                            2.350e+01        NaN     NaN      NaN
## CountryMalaysia                          3.860e+01        NaN     NaN      NaN
## CountryMaldives                          1.780e+01        NaN     NaN      NaN
## CountryMali                              2.640e+01        NaN     NaN      NaN
## CountryMalta                             4.200e+01        NaN     NaN      NaN
## CountryMauritania                        2.580e+01        NaN     NaN      NaN
## CountryMauritius                         4.140e+01        NaN     NaN      NaN
## CountryMexico                            3.420e+01        NaN     NaN      NaN
## CountryMicronesia                        3.150e+01        NaN     NaN      NaN
## CountryMoldova                           3.180e+01        NaN     NaN      NaN
## CountryMongolia                          3.440e+01        NaN     NaN      NaN
## CountryMontenegro                        2.830e+01        NaN     NaN      NaN
## CountryMorocco                           2.970e+01        NaN     NaN      NaN
## CountryMozambique                        2.180e+01        NaN     NaN      NaN
## CountryMyanmar                           2.010e+01        NaN     NaN      NaN
## CountryNamibia                           2.970e+01        NaN     NaN      NaN
## CountryNepal                             2.020e+01        NaN     NaN      NaN
## CountryNetherlands                       5.000e+01        NaN     NaN      NaN
## CountryNew Zealand                       5.110e+01        NaN     NaN      NaN
## CountryNicaragua                         2.530e+01        NaN     NaN      NaN
## CountryNiger                             2.540e+01        NaN     NaN      NaN
## CountryNigeria                           2.490e+01        NaN     NaN      NaN
## CountryNorway                            4.740e+01        NaN     NaN      NaN
## CountryOman                              2.710e+01        NaN     NaN      NaN
## CountryPakistan                          1.930e+01        NaN     NaN      NaN
## CountryPanama                            3.590e+01        NaN     NaN      NaN
## CountryPapua New Guinea                  2.510e+01        NaN     NaN      NaN
## CountryParaguay                          3.340e+01        NaN     NaN      NaN
## CountryPeru                              3.700e+01        NaN     NaN      NaN
## CountryPhilippines                       3.160e+01        NaN     NaN      NaN
## CountryPoland                            3.920e+01        NaN     NaN      NaN
## CountryPortugal                          4.130e+01        NaN     NaN      NaN
## CountryQatar                             3.820e+01        NaN     NaN      NaN
## CountryRomania                           3.760e+01        NaN     NaN      NaN
## CountryRussia                            2.660e+01        NaN     NaN      NaN
## CountryRwanda                            2.760e+01        NaN     NaN      NaN
## CountrySaint Lucia                       3.480e+01        NaN     NaN      NaN
## CountrySaint Vincent and the Grenadines  3.620e+01        NaN     NaN      NaN
## CountrySamoa                             3.880e+01        NaN     NaN      NaN
## CountrySaudi Arabia                      2.600e+01        NaN     NaN      NaN
## CountrySenegal                           3.050e+01        NaN     NaN      NaN
## CountrySerbia                            3.570e+01        NaN     NaN      NaN
## CountrySeychelles                        3.160e+01        NaN     NaN      NaN
## CountrySierra Leone                      2.250e+01        NaN     NaN      NaN
## CountrySingapore                         5.490e+01        NaN     NaN      NaN
## CountrySlovakia                          4.020e+01        NaN     NaN      NaN
## CountrySlovenia                          4.100e+01        NaN     NaN      NaN
## CountrySolomon Islands                   2.700e+01        NaN     NaN      NaN
## CountrySomalia                           3.150e+01        NaN     NaN      NaN
## CountrySouth Africa                      2.670e+01        NaN     NaN      NaN
## CountrySpain                             3.870e+01        NaN     NaN      NaN
## CountrySri Lanka                         2.380e+01        NaN     NaN      NaN
## CountrySudan                             2.500e+00        NaN     NaN      NaN
## CountrySuriname                          1.860e+01        NaN     NaN      NaN
## CountrySweden                            4.840e+01        NaN     NaN      NaN
## CountrySwitzerland                       5.470e+01        NaN     NaN      NaN
## CountrySyria                             2.460e+01        NaN     NaN      NaN
## CountryTajikistan                        2.020e+01        NaN     NaN      NaN
## CountryTanzania                          3.000e+01        NaN     NaN      NaN
## CountryThailand                          3.370e+01        NaN     NaN      NaN
## CountryTimor-Leste                       1.680e+01        NaN     NaN      NaN
## CountryTogo                              2.770e+01        NaN     NaN      NaN
## CountryTonga                             3.130e+01        NaN     NaN      NaN
## CountryTrinidad and Tobago               2.930e+01        NaN     NaN      NaN
## CountryTunisia                           2.470e+01        NaN     NaN      NaN
## CountryTurkey                            2.740e+01        NaN     NaN      NaN
## CountryTurkmenistan                      1.670e+01        NaN     NaN      NaN
## CountryUganda                            2.470e+01        NaN     NaN      NaN
## CountryUkraine                           2.460e+01        NaN     NaN      NaN
## CountryUnited Arab Emirates              4.070e+01        NaN     NaN      NaN
## CountryUnited Kingdom                    4.320e+01        NaN     NaN      NaN
## CountryUnited States                     4.260e+01        NaN     NaN      NaN
## CountryUruguay                           4.050e+01        NaN     NaN      NaN
## CountryUzbekistan                        2.620e+01        NaN     NaN      NaN
## CountryVanuatu                           3.340e+01        NaN     NaN      NaN
## CountryVenezuela                        -4.700e+00        NaN     NaN      NaN
## CountryVietnam                           3.110e+01        NaN     NaN      NaN
## CountryYemen                             1.860e+01        NaN     NaN      NaN
## CountryZambia                            1.920e+01        NaN     NaN      NaN
## CountryZimbabwe                          3.600e+00        NaN     NaN      NaN
## GNI_Capita_PPP                                  NA         NA      NA       NA
## 
## Residual standard error: NaN on 0 degrees of freedom
## Multiple R-squared:      1,  Adjusted R-squared:    NaN 
## F-statistic:   NaN on 178 and 0 DF,  p-value: NA
resid %>% as.data.frame() %>% ggplot(aes(x=resid)) + geom_boxplot(fill="lightblue")

# The majority of the residuals are between (-20, 20), which is fine. However, there are a few values outside that interval (outliers). To reduce noise we can remove that values or discretize the variable.

Another outliers against other variables are:

idx = outlier(resid, logical=T)
data$Country[which(idx==TRUE)]  # Again, North Korea.
## [1] "Korea, North "
which(resid < -20 | resid >20)  # We have 6 countries with outlier residuals.
##   1  41  86  88 152 176 180 
##   1  41  86  88 152 175 179
# We can see that there are 3-6 countries that are outliers for any 
# combination of economic_freedom with any other variable:
lm.fit1 = lm(Economic_freedom ~Unemployment, data)
resid1 = residuals(lm.fit1)
qplot(data$Unemployment, resid1)

summary(lm(Economic_freedom ~ Country + Unemployment, data))
## 
## Call:
## lm(formula = Economic_freedom ~ Country + Unemployment, data = data)
## 
## Residuals:
## ALL 179 residuals are 0: no residual degrees of freedom!
## 
## Coefficients: (1 not defined because of singularities)
##                                           Estimate Std. Error t value Pr(>|t|)
## (Intercept)                              2.950e+01        NaN     NaN      NaN
## CountryAlbania                           3.710e+01        NaN     NaN      NaN
## CountryAlgeria                           1.630e+01        NaN     NaN      NaN
## CountryAngola                            2.310e+01        NaN     NaN      NaN
## CountryArgentina                         2.060e+01        NaN     NaN      NaN
## CountryArmenia                           3.580e+01        NaN     NaN      NaN
## CountryAustralia                         4.820e+01        NaN     NaN      NaN
## CountryAustria                           4.430e+01        NaN     NaN      NaN
## CountryAzerbaijan                        3.210e+01        NaN     NaN      NaN
## CountryBahamas                           3.920e+01        NaN     NaN      NaN
## CountryBahrain                           3.250e+01        NaN     NaN      NaN
## CountryBangladesh                        2.320e+01        NaN     NaN      NaN
## CountryBarbados                          4.180e+01        NaN     NaN      NaN
## CountryBelarus                           2.350e+01        NaN     NaN      NaN
## CountryBelgium                           4.010e+01        NaN     NaN      NaN
## CountryBelize                            2.710e+01        NaN     NaN      NaN
## CountryBenin                             3.150e+01        NaN     NaN      NaN
## CountryBhutan                            2.980e+01        NaN     NaN      NaN
## CountryBolivia                           1.350e+01        NaN     NaN      NaN
## CountryBosnia and Herzegovina            3.390e+01        NaN     NaN      NaN
## CountryBotswana                          3.530e+01        NaN     NaN      NaN
## CountryBrazil                            2.380e+01        NaN     NaN      NaN
## CountryBrunei Darussalam                 3.530e+01        NaN     NaN      NaN
## CountryBulgaria                          4.150e+01        NaN     NaN      NaN
## CountryBurkina Faso                      2.880e+01        NaN     NaN      NaN
## CountryBurundi                           9.900e+00        NaN     NaN      NaN
## CountryCabo Verde                        3.720e+01        NaN     NaN      NaN
## CountryCambodia                          2.760e+01        NaN     NaN      NaN
## CountryCameroon                          2.340e+01        NaN     NaN      NaN
## CountryCanada                            4.710e+01        NaN     NaN      NaN
## CountryCentral African Republic          1.620e+01        NaN     NaN      NaN
## CountryChad                              2.030e+01        NaN     NaN      NaN
## CountryChile                             4.490e+01        NaN     NaN      NaN
## CountryChina                             1.850e+01        NaN     NaN      NaN
## CountryColombia                          3.560e+01        NaN     NaN      NaN
## CountryComoros                           2.090e+01        NaN     NaN      NaN
## CountryCongo, Dem. Rep.                  1.810e+01        NaN     NaN      NaN
## CountryCongo, Rep.                       1.900e+01        NaN     NaN      NaN
## CountryCosta Rica                        3.590e+01        NaN     NaN      NaN
## CountryCroatia                           3.810e+01        NaN     NaN      NaN
## CountryCuba                              4.724e-13        NaN     NaN      NaN
## CountryCyprus                            4.340e+01        NaN     NaN      NaN
## CountryCzech Republic                    4.490e+01        NaN     NaN      NaN
## CountryDenmark                           4.850e+01        NaN     NaN      NaN
## CountryDjibouti                          2.580e+01        NaN     NaN      NaN
## CountryDominica                          2.490e+01        NaN     NaN      NaN
## CountryDominican Republic                3.350e+01        NaN     NaN      NaN
## CountryEcuador                           2.480e+01        NaN     NaN      NaN
## CountryEgypt                             1.960e+01        NaN     NaN      NaN
## CountryEl Salvador                       3.010e+01        NaN     NaN      NaN
## CountryEquatorial Guinea                 1.770e+01        NaN     NaN      NaN
## CountryEritrea                           1.020e+01        NaN     NaN      NaN
## CountryEstonia                           5.050e+01        NaN     NaN      NaN
## CountryEswatini                          2.190e+01        NaN     NaN      NaN
## CountryEthiopia                          2.010e+01        NaN     NaN      NaN
## CountryFiji                              2.690e+01        NaN     NaN      NaN
## CountryFinland                           4.880e+01        NaN     NaN      NaN
## CountryFrance                            3.640e+01        NaN     NaN      NaN
## CountryGabon                             2.630e+01        NaN     NaN      NaN
## CountryGambia                            2.850e+01        NaN     NaN      NaN
## CountryGeorgia                           4.230e+01        NaN     NaN      NaN
## CountryGermany                           4.660e+01        NaN     NaN      NaN
## CountryGhana                             3.030e+01        NaN     NaN      NaN
## CountryGreece                            3.200e+01        NaN     NaN      NaN
## CountryGuatemala                         3.370e+01        NaN     NaN      NaN
## CountryGuinea                            2.470e+01        NaN     NaN      NaN
## CountryGuinea-Bissau                     1.650e+01        NaN     NaN      NaN
## CountryGuyana                            3.000e+01        NaN     NaN      NaN
## CountryHaiti                             2.050e+01        NaN     NaN      NaN
## CountryHonduras                          3.000e+01        NaN     NaN      NaN
## CountryHungary                           3.740e+01        NaN     NaN      NaN
## CountryIceland                           4.750e+01        NaN     NaN      NaN
## CountryIndia                             2.440e+01        NaN     NaN      NaN
## CountryIndonesia                         3.490e+01        NaN     NaN      NaN
## CountryIran                              1.290e+01        NaN     NaN      NaN
## CountryIraq                              2.340e+01        NaN     NaN      NaN
## CountryIreland                           5.250e+01        NaN     NaN      NaN
## CountryIsrael                            3.850e+01        NaN     NaN      NaN
## CountryItaly                             3.590e+01        NaN     NaN      NaN
## CountryJamaica                           3.790e+01        NaN     NaN      NaN
## CountryJapan                             4.040e+01        NaN     NaN      NaN
## CountryJordan                            3.060e+01        NaN     NaN      NaN
## CountryKazakhstan                        3.490e+01        NaN     NaN      NaN
## CountryKenya                             2.310e+01        NaN     NaN      NaN
## CountryKiribati                          2.970e+01        NaN     NaN      NaN
## CountryKorea, North                     -2.650e+01        NaN     NaN      NaN
## CountryKorea, South                      4.510e+01        NaN     NaN      NaN
## CountryKuwait                            2.880e+01        NaN     NaN      NaN
## CountryKyrgyzstan                        2.630e+01        NaN     NaN      NaN
## CountryLao P.D.R.                        1.970e+01        NaN     NaN      NaN
## CountryLatvia                            4.530e+01        NaN     NaN      NaN
## CountryLebanon                           1.780e+01        NaN     NaN      NaN
## CountryLesotho                           1.860e+01        NaN     NaN      NaN
## CountryLiberia                           1.840e+01        NaN     NaN      NaN
## CountryLibya                             4.230e+01        NaN     NaN      NaN
## CountryLithuania                         4.630e+01        NaN     NaN      NaN
## CountryLuxembourg                        5.110e+01        NaN     NaN      NaN
## CountryMacedonia                         3.620e+01        NaN     NaN      NaN
## CountryMadagascar                        2.940e+01        NaN     NaN      NaN
## CountryMalawi                            2.350e+01        NaN     NaN      NaN
## CountryMalaysia                          3.860e+01        NaN     NaN      NaN
## CountryMaldives                          1.780e+01        NaN     NaN      NaN
## CountryMali                              2.640e+01        NaN     NaN      NaN
## CountryMalta                             4.200e+01        NaN     NaN      NaN
## CountryMauritania                        2.580e+01        NaN     NaN      NaN
## CountryMauritius                         4.140e+01        NaN     NaN      NaN
## CountryMexico                            3.420e+01        NaN     NaN      NaN
## CountryMicronesia                        3.150e+01        NaN     NaN      NaN
## CountryMoldova                           3.180e+01        NaN     NaN      NaN
## CountryMongolia                          3.440e+01        NaN     NaN      NaN
## CountryMontenegro                        2.830e+01        NaN     NaN      NaN
## CountryMorocco                           2.970e+01        NaN     NaN      NaN
## CountryMozambique                        2.180e+01        NaN     NaN      NaN
## CountryMyanmar                           2.010e+01        NaN     NaN      NaN
## CountryNamibia                           2.970e+01        NaN     NaN      NaN
## CountryNepal                             2.020e+01        NaN     NaN      NaN
## CountryNetherlands                       5.000e+01        NaN     NaN      NaN
## CountryNew Zealand                       5.110e+01        NaN     NaN      NaN
## CountryNicaragua                         2.530e+01        NaN     NaN      NaN
## CountryNiger                             2.540e+01        NaN     NaN      NaN
## CountryNigeria                           2.490e+01        NaN     NaN      NaN
## CountryNorway                            4.740e+01        NaN     NaN      NaN
## CountryOman                              2.710e+01        NaN     NaN      NaN
## CountryPakistan                          1.930e+01        NaN     NaN      NaN
## CountryPanama                            3.590e+01        NaN     NaN      NaN
## CountryPapua New Guinea                  2.510e+01        NaN     NaN      NaN
## CountryParaguay                          3.340e+01        NaN     NaN      NaN
## CountryPeru                              3.700e+01        NaN     NaN      NaN
## CountryPhilippines                       3.160e+01        NaN     NaN      NaN
## CountryPoland                            3.920e+01        NaN     NaN      NaN
## CountryPortugal                          4.130e+01        NaN     NaN      NaN
## CountryQatar                             3.820e+01        NaN     NaN      NaN
## CountryRomania                           3.760e+01        NaN     NaN      NaN
## CountryRussia                            2.660e+01        NaN     NaN      NaN
## CountryRwanda                            2.760e+01        NaN     NaN      NaN
## CountrySaint Lucia                       3.480e+01        NaN     NaN      NaN
## CountrySaint Vincent and the Grenadines  3.620e+01        NaN     NaN      NaN
## CountrySamoa                             3.880e+01        NaN     NaN      NaN
## CountrySaudi Arabia                      2.600e+01        NaN     NaN      NaN
## CountrySenegal                           3.050e+01        NaN     NaN      NaN
## CountrySerbia                            3.570e+01        NaN     NaN      NaN
## CountrySeychelles                        3.160e+01        NaN     NaN      NaN
## CountrySierra Leone                      2.250e+01        NaN     NaN      NaN
## CountrySingapore                         5.490e+01        NaN     NaN      NaN
## CountrySlovakia                          4.020e+01        NaN     NaN      NaN
## CountrySlovenia                          4.100e+01        NaN     NaN      NaN
## CountrySolomon Islands                   2.700e+01        NaN     NaN      NaN
## CountrySomalia                           3.150e+01        NaN     NaN      NaN
## CountrySouth Africa                      2.670e+01        NaN     NaN      NaN
## CountrySpain                             3.870e+01        NaN     NaN      NaN
## CountrySri Lanka                         2.380e+01        NaN     NaN      NaN
## CountrySudan                             2.500e+00        NaN     NaN      NaN
## CountrySuriname                          1.860e+01        NaN     NaN      NaN
## CountrySweden                            4.840e+01        NaN     NaN      NaN
## CountrySwitzerland                       5.470e+01        NaN     NaN      NaN
## CountrySyria                             2.460e+01        NaN     NaN      NaN
## CountryTajikistan                        2.020e+01        NaN     NaN      NaN
## CountryTanzania                          3.000e+01        NaN     NaN      NaN
## CountryThailand                          3.370e+01        NaN     NaN      NaN
## CountryTimor-Leste                       1.680e+01        NaN     NaN      NaN
## CountryTogo                              2.770e+01        NaN     NaN      NaN
## CountryTonga                             3.130e+01        NaN     NaN      NaN
## CountryTrinidad and Tobago               2.930e+01        NaN     NaN      NaN
## CountryTunisia                           2.470e+01        NaN     NaN      NaN
## CountryTurkey                            2.740e+01        NaN     NaN      NaN
## CountryTurkmenistan                      1.670e+01        NaN     NaN      NaN
## CountryUganda                            2.470e+01        NaN     NaN      NaN
## CountryUkraine                           2.460e+01        NaN     NaN      NaN
## CountryUnited Arab Emirates              4.070e+01        NaN     NaN      NaN
## CountryUnited Kingdom                    4.320e+01        NaN     NaN      NaN
## CountryUnited States                     4.260e+01        NaN     NaN      NaN
## CountryUruguay                           4.050e+01        NaN     NaN      NaN
## CountryUzbekistan                        2.620e+01        NaN     NaN      NaN
## CountryVanuatu                           3.340e+01        NaN     NaN      NaN
## CountryVenezuela                        -4.700e+00        NaN     NaN      NaN
## CountryVietnam                           3.110e+01        NaN     NaN      NaN
## CountryYemen                             1.860e+01        NaN     NaN      NaN
## CountryZambia                            1.920e+01        NaN     NaN      NaN
## CountryZimbabwe                          3.600e+00        NaN     NaN      NaN
## Unemployment                                    NA         NA      NA       NA
## 
## Residual standard error: NaN on 0 degrees of freedom
## Multiple R-squared:      1,  Adjusted R-squared:    NaN 
## F-statistic:   NaN on 178 and 0 DF,  p-value: NA
resid1 %>% as.data.frame() %>% ggplot(aes(x=resid1)) + geom_boxplot(fill="lightblue")

which(resid1 < -30 | resid1 >30)
##   1  41  86 176 
##   1  41  86 175
lm.fit2 = lm(Economic_freedom ~ Life_Expentancy, data)
resid2 = residuals(lm.fit2)
qplot(data$Life_Expentancy, resid2)

summary(lm(Economic_freedom ~ Country + Life_Expentancy, data))
## 
## Call:
## lm(formula = Economic_freedom ~ Country + Life_Expentancy, data = data)
## 
## Residuals:
## ALL 179 residuals are 0: no residual degrees of freedom!
## 
## Coefficients: (1 not defined because of singularities)
##                                           Estimate Std. Error t value Pr(>|t|)
## (Intercept)                              2.950e+01        NaN     NaN      NaN
## CountryAlbania                           3.710e+01        NaN     NaN      NaN
## CountryAlgeria                           1.630e+01        NaN     NaN      NaN
## CountryAngola                            2.310e+01        NaN     NaN      NaN
## CountryArgentina                         2.060e+01        NaN     NaN      NaN
## CountryArmenia                           3.580e+01        NaN     NaN      NaN
## CountryAustralia                         4.820e+01        NaN     NaN      NaN
## CountryAustria                           4.430e+01        NaN     NaN      NaN
## CountryAzerbaijan                        3.210e+01        NaN     NaN      NaN
## CountryBahamas                           3.920e+01        NaN     NaN      NaN
## CountryBahrain                           3.250e+01        NaN     NaN      NaN
## CountryBangladesh                        2.320e+01        NaN     NaN      NaN
## CountryBarbados                          4.180e+01        NaN     NaN      NaN
## CountryBelarus                           2.350e+01        NaN     NaN      NaN
## CountryBelgium                           4.010e+01        NaN     NaN      NaN
## CountryBelize                            2.710e+01        NaN     NaN      NaN
## CountryBenin                             3.150e+01        NaN     NaN      NaN
## CountryBhutan                            2.980e+01        NaN     NaN      NaN
## CountryBolivia                           1.350e+01        NaN     NaN      NaN
## CountryBosnia and Herzegovina            3.390e+01        NaN     NaN      NaN
## CountryBotswana                          3.530e+01        NaN     NaN      NaN
## CountryBrazil                            2.380e+01        NaN     NaN      NaN
## CountryBrunei Darussalam                 3.530e+01        NaN     NaN      NaN
## CountryBulgaria                          4.150e+01        NaN     NaN      NaN
## CountryBurkina Faso                      2.880e+01        NaN     NaN      NaN
## CountryBurundi                           9.900e+00        NaN     NaN      NaN
## CountryCabo Verde                        3.720e+01        NaN     NaN      NaN
## CountryCambodia                          2.760e+01        NaN     NaN      NaN
## CountryCameroon                          2.340e+01        NaN     NaN      NaN
## CountryCanada                            4.710e+01        NaN     NaN      NaN
## CountryCentral African Republic          1.620e+01        NaN     NaN      NaN
## CountryChad                              2.030e+01        NaN     NaN      NaN
## CountryChile                             4.490e+01        NaN     NaN      NaN
## CountryChina                             1.850e+01        NaN     NaN      NaN
## CountryColombia                          3.560e+01        NaN     NaN      NaN
## CountryComoros                           2.090e+01        NaN     NaN      NaN
## CountryCongo, Dem. Rep.                  1.810e+01        NaN     NaN      NaN
## CountryCongo, Rep.                       1.900e+01        NaN     NaN      NaN
## CountryCosta Rica                        3.590e+01        NaN     NaN      NaN
## CountryCroatia                           3.810e+01        NaN     NaN      NaN
## CountryCuba                              4.724e-13        NaN     NaN      NaN
## CountryCyprus                            4.340e+01        NaN     NaN      NaN
## CountryCzech Republic                    4.490e+01        NaN     NaN      NaN
## CountryDenmark                           4.850e+01        NaN     NaN      NaN
## CountryDjibouti                          2.580e+01        NaN     NaN      NaN
## CountryDominica                          2.490e+01        NaN     NaN      NaN
## CountryDominican Republic                3.350e+01        NaN     NaN      NaN
## CountryEcuador                           2.480e+01        NaN     NaN      NaN
## CountryEgypt                             1.960e+01        NaN     NaN      NaN
## CountryEl Salvador                       3.010e+01        NaN     NaN      NaN
## CountryEquatorial Guinea                 1.770e+01        NaN     NaN      NaN
## CountryEritrea                           1.020e+01        NaN     NaN      NaN
## CountryEstonia                           5.050e+01        NaN     NaN      NaN
## CountryEswatini                          2.190e+01        NaN     NaN      NaN
## CountryEthiopia                          2.010e+01        NaN     NaN      NaN
## CountryFiji                              2.690e+01        NaN     NaN      NaN
## CountryFinland                           4.880e+01        NaN     NaN      NaN
## CountryFrance                            3.640e+01        NaN     NaN      NaN
## CountryGabon                             2.630e+01        NaN     NaN      NaN
## CountryGambia                            2.850e+01        NaN     NaN      NaN
## CountryGeorgia                           4.230e+01        NaN     NaN      NaN
## CountryGermany                           4.660e+01        NaN     NaN      NaN
## CountryGhana                             3.030e+01        NaN     NaN      NaN
## CountryGreece                            3.200e+01        NaN     NaN      NaN
## CountryGuatemala                         3.370e+01        NaN     NaN      NaN
## CountryGuinea                            2.470e+01        NaN     NaN      NaN
## CountryGuinea-Bissau                     1.650e+01        NaN     NaN      NaN
## CountryGuyana                            3.000e+01        NaN     NaN      NaN
## CountryHaiti                             2.050e+01        NaN     NaN      NaN
## CountryHonduras                          3.000e+01        NaN     NaN      NaN
## CountryHungary                           3.740e+01        NaN     NaN      NaN
## CountryIceland                           4.750e+01        NaN     NaN      NaN
## CountryIndia                             2.440e+01        NaN     NaN      NaN
## CountryIndonesia                         3.490e+01        NaN     NaN      NaN
## CountryIran                              1.290e+01        NaN     NaN      NaN
## CountryIraq                              2.340e+01        NaN     NaN      NaN
## CountryIreland                           5.250e+01        NaN     NaN      NaN
## CountryIsrael                            3.850e+01        NaN     NaN      NaN
## CountryItaly                             3.590e+01        NaN     NaN      NaN
## CountryJamaica                           3.790e+01        NaN     NaN      NaN
## CountryJapan                             4.040e+01        NaN     NaN      NaN
## CountryJordan                            3.060e+01        NaN     NaN      NaN
## CountryKazakhstan                        3.490e+01        NaN     NaN      NaN
## CountryKenya                             2.310e+01        NaN     NaN      NaN
## CountryKiribati                          2.970e+01        NaN     NaN      NaN
## CountryKorea, North                     -2.650e+01        NaN     NaN      NaN
## CountryKorea, South                      4.510e+01        NaN     NaN      NaN
## CountryKuwait                            2.880e+01        NaN     NaN      NaN
## CountryKyrgyzstan                        2.630e+01        NaN     NaN      NaN
## CountryLao P.D.R.                        1.970e+01        NaN     NaN      NaN
## CountryLatvia                            4.530e+01        NaN     NaN      NaN
## CountryLebanon                           1.780e+01        NaN     NaN      NaN
## CountryLesotho                           1.860e+01        NaN     NaN      NaN
## CountryLiberia                           1.840e+01        NaN     NaN      NaN
## CountryLibya                             4.230e+01        NaN     NaN      NaN
## CountryLithuania                         4.630e+01        NaN     NaN      NaN
## CountryLuxembourg                        5.110e+01        NaN     NaN      NaN
## CountryMacedonia                         3.620e+01        NaN     NaN      NaN
## CountryMadagascar                        2.940e+01        NaN     NaN      NaN
## CountryMalawi                            2.350e+01        NaN     NaN      NaN
## CountryMalaysia                          3.860e+01        NaN     NaN      NaN
## CountryMaldives                          1.780e+01        NaN     NaN      NaN
## CountryMali                              2.640e+01        NaN     NaN      NaN
## CountryMalta                             4.200e+01        NaN     NaN      NaN
## CountryMauritania                        2.580e+01        NaN     NaN      NaN
## CountryMauritius                         4.140e+01        NaN     NaN      NaN
## CountryMexico                            3.420e+01        NaN     NaN      NaN
## CountryMicronesia                        3.150e+01        NaN     NaN      NaN
## CountryMoldova                           3.180e+01        NaN     NaN      NaN
## CountryMongolia                          3.440e+01        NaN     NaN      NaN
## CountryMontenegro                        2.830e+01        NaN     NaN      NaN
## CountryMorocco                           2.970e+01        NaN     NaN      NaN
## CountryMozambique                        2.180e+01        NaN     NaN      NaN
## CountryMyanmar                           2.010e+01        NaN     NaN      NaN
## CountryNamibia                           2.970e+01        NaN     NaN      NaN
## CountryNepal                             2.020e+01        NaN     NaN      NaN
## CountryNetherlands                       5.000e+01        NaN     NaN      NaN
## CountryNew Zealand                       5.110e+01        NaN     NaN      NaN
## CountryNicaragua                         2.530e+01        NaN     NaN      NaN
## CountryNiger                             2.540e+01        NaN     NaN      NaN
## CountryNigeria                           2.490e+01        NaN     NaN      NaN
## CountryNorway                            4.740e+01        NaN     NaN      NaN
## CountryOman                              2.710e+01        NaN     NaN      NaN
## CountryPakistan                          1.930e+01        NaN     NaN      NaN
## CountryPanama                            3.590e+01        NaN     NaN      NaN
## CountryPapua New Guinea                  2.510e+01        NaN     NaN      NaN
## CountryParaguay                          3.340e+01        NaN     NaN      NaN
## CountryPeru                              3.700e+01        NaN     NaN      NaN
## CountryPhilippines                       3.160e+01        NaN     NaN      NaN
## CountryPoland                            3.920e+01        NaN     NaN      NaN
## CountryPortugal                          4.130e+01        NaN     NaN      NaN
## CountryQatar                             3.820e+01        NaN     NaN      NaN
## CountryRomania                           3.760e+01        NaN     NaN      NaN
## CountryRussia                            2.660e+01        NaN     NaN      NaN
## CountryRwanda                            2.760e+01        NaN     NaN      NaN
## CountrySaint Lucia                       3.480e+01        NaN     NaN      NaN
## CountrySaint Vincent and the Grenadines  3.620e+01        NaN     NaN      NaN
## CountrySamoa                             3.880e+01        NaN     NaN      NaN
## CountrySaudi Arabia                      2.600e+01        NaN     NaN      NaN
## CountrySenegal                           3.050e+01        NaN     NaN      NaN
## CountrySerbia                            3.570e+01        NaN     NaN      NaN
## CountrySeychelles                        3.160e+01        NaN     NaN      NaN
## CountrySierra Leone                      2.250e+01        NaN     NaN      NaN
## CountrySingapore                         5.490e+01        NaN     NaN      NaN
## CountrySlovakia                          4.020e+01        NaN     NaN      NaN
## CountrySlovenia                          4.100e+01        NaN     NaN      NaN
## CountrySolomon Islands                   2.700e+01        NaN     NaN      NaN
## CountrySomalia                           3.150e+01        NaN     NaN      NaN
## CountrySouth Africa                      2.670e+01        NaN     NaN      NaN
## CountrySpain                             3.870e+01        NaN     NaN      NaN
## CountrySri Lanka                         2.380e+01        NaN     NaN      NaN
## CountrySudan                             2.500e+00        NaN     NaN      NaN
## CountrySuriname                          1.860e+01        NaN     NaN      NaN
## CountrySweden                            4.840e+01        NaN     NaN      NaN
## CountrySwitzerland                       5.470e+01        NaN     NaN      NaN
## CountrySyria                             2.460e+01        NaN     NaN      NaN
## CountryTajikistan                        2.020e+01        NaN     NaN      NaN
## CountryTanzania                          3.000e+01        NaN     NaN      NaN
## CountryThailand                          3.370e+01        NaN     NaN      NaN
## CountryTimor-Leste                       1.680e+01        NaN     NaN      NaN
## CountryTogo                              2.770e+01        NaN     NaN      NaN
## CountryTonga                             3.130e+01        NaN     NaN      NaN
## CountryTrinidad and Tobago               2.930e+01        NaN     NaN      NaN
## CountryTunisia                           2.470e+01        NaN     NaN      NaN
## CountryTurkey                            2.740e+01        NaN     NaN      NaN
## CountryTurkmenistan                      1.670e+01        NaN     NaN      NaN
## CountryUganda                            2.470e+01        NaN     NaN      NaN
## CountryUkraine                           2.460e+01        NaN     NaN      NaN
## CountryUnited Arab Emirates              4.070e+01        NaN     NaN      NaN
## CountryUnited Kingdom                    4.320e+01        NaN     NaN      NaN
## CountryUnited States                     4.260e+01        NaN     NaN      NaN
## CountryUruguay                           4.050e+01        NaN     NaN      NaN
## CountryUzbekistan                        2.620e+01        NaN     NaN      NaN
## CountryVanuatu                           3.340e+01        NaN     NaN      NaN
## CountryVenezuela                        -4.700e+00        NaN     NaN      NaN
## CountryVietnam                           3.110e+01        NaN     NaN      NaN
## CountryYemen                             1.860e+01        NaN     NaN      NaN
## CountryZambia                            1.920e+01        NaN     NaN      NaN
## CountryZimbabwe                          3.600e+00        NaN     NaN      NaN
## Life_Expentancy                                 NA         NA      NA       NA
## 
## Residual standard error: NaN on 0 degrees of freedom
## Multiple R-squared:      1,  Adjusted R-squared:    NaN 
## F-statistic:   NaN on 178 and 0 DF,  p-value: NA
resid2 %>% as.data.frame() %>% ggplot(aes(x=resid2)) + geom_boxplot(fill="lightblue")

which(resid2 < -19 | resid2 >19)
##  41  75  86 152 176 
##  41  75  86 152 175

We have seen many ways to identify the outliers in this variable. In general, the outliers that we get with the different methods are the same. We could remove the extreme values to reduce noise, but as I don’t want to remove more rows of our dataset, I believe that the best decision is to discretize the variable to reduce noise.

Discretize the variable

# I will discretize them in the same the way that the website
# where I got the data from does it: https://www.heritage.org/index/ranking

# 5 levels:
data$Economic_freedom[data$Economic_freedom >= 80] <- 'Free'
data$Economic_freedom[data$Economic_freedom <= 79.9 & 
                        data$Economic_freedom >= 70] <- 'Mostly Free'
data$Economic_freedom[data$Economic_freedom <= 69.9 & 
                        data$Economic_freedom >= 60] <- 'Moderately Free'
data$Economic_freedom[data$Economic_freedom <= 59.9 & 
                        data$Economic_freedom >= 50] <- 'Mostly Unfree'
data$Economic_freedom[data$Economic_freedom <= 49.9] <- 'Repressed'

# The counts are:
table(data$Economic_freedom)
## 
##            Free Moderately Free     Mostly Free   Mostly Unfree       Repressed 
##               6              52              28              59              34
prop.table(table(data$Economic_freedom))
## 
##            Free Moderately Free     Mostly Free   Mostly Unfree       Repressed 
##      0.03351955      0.29050279      0.15642458      0.32960894      0.18994413
# Let's reorder the levels:
data$Economic_freedom = factor(data$Economic_freedom, levels = 
                                 c('Free', 'Mostly Free', 'Moderately Free',
                                   'Mostly Unfree', 'Repressed'))

Visualization: interesting graphs

# Let's make some graphs to understand better the relations between variables:

# GNI per capita vs Life Expectancy grouping countries by economic freedom level:
ggplot(data, aes(y=GNI_Capita_PPP, x=Life_Expentancy,
                                      group=Economic_freedom, color=Unemployment))+
  scale_x_sqrt(breaks=c(0.05,0.1), label=c("5%","10%"))+
  geom_point(alpha=0.5) +
  geom_smooth(method=lm,se=F, formula = y~x) +
  facet_wrap(~ Economic_freedom) +
  scale_color_gradient(low="green", high="red") +
  theme_gray() +
  labs(title = "GNI per capita PPP vs Life Expentancy",
        caption="Alvaro Martin",
       x = "", y = "")

# More developed countries (that we have seen that are the more free) emit more CO2 emissions:
ggplot(data)+aes(y=CO2, fill = Economic_freedom)+geom_boxplot()+
  facet_wrap(~Economic_freedom)+ theme(legend.position = 'none')

# Los paises mas desarrollados son responsables de la mayor mparte de las emisiones

# In poor ccuntries, women have more kids:
ggplot(data)+aes(x=GDP_capita_PPP, y = Fertility)+geom_point()+geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

# The number of cell phones and internet users is related.
ggplot(data)+aes(x = Internet_users, y = Cell_phones)+geom_quantile()+geom_point()
## Smoothing formula not specified. Using: y ~ x

# Let's check it by calculating the correlation
cor(data$Cell_phones, data$Internet_users, method = c("pearson", "kendall", "spearman"))
## [1] 0.7966409
# Another couple of variables that are related:
ggscatter(data, x = "Adult_Mortality", y = "Infant_Mortality", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.method = "pearson",
          xlab = "Adult_Mortality", ylab = "Infant_Mortality")
## `geom_smooth()` using formula 'y ~ x'

# Democracy by region
ggplot(data)+aes(y=Democracy, fill = Region)+geom_bar()+facet_wrap(~Region)

# Europe and America are the continents with more advanced democracies


# How many countries we have from each region?
ggplot(data, aes(x="", y="", fill=Region)) +
  geom_bar(stat="identity", width=1) +
  coord_polar("y", start=0)

# What is the mean education equality rate for each group of countries in terms of economic freedom
value <- c(mean(data$Education_Equality[which(data$Economic_freedom=='Free')]),
           mean(data$Education_Equality[which(data$Economic_freedom=='Mostly Free')]),
           mean(data$Education_Equality[which(data$Economic_freedom=='Moderately Free')]),
           mean(data$Education_Equality[which(data$Economic_freedom=='Mostly Unfree')]),
           mean(data$Education_Equality[which(data$Economic_freedom=='Repressed')]))
group <- c("Free", "Mostly-Free", "Moderately-Free", "Mostly-Unfree", "Repressed")
df <- data.frame(group,value)
ggplot(df, aes(group, value)) + geom_linerange(aes(x = group, ymin = 0.8, ymax = value), 
    color = "lightgray", size = 1.5)+ geom_point(aes(color = group), size = 3)+
  ggpubr::color_palette("jco")+theme_pubclean()+ theme(legend.position = 'none')

# Freer countries are more egalitarians.

# Business freedom by region
ggplot(data,(aes(x=Region, y = Business_Freedom, fill = Region)))+geom_violin(scale = "area")+
   geom_dotplot(binaxis='y', stackdir='center', dotsize=0.5)
## Bin width defaults to 1/30 of the range of the data. Pick better value with `binwidth`.

# Infant moratlity is extremely high in sub-saharian countries:
ggplot(data, aes(x=reorder(Region, Under5_mortality), y = Under5_mortality, fill = Region))+geom_boxplot()

# property rights vs judical effectiveness by region:
ggplot(data, aes(x=Judical_Effectiveness, y=Property_Rights) ) +
  stat_density_2d(aes(fill = ..level..), geom = "polygon")+
  theme(legend.position = 'none')+facet_wrap(~Region)

# property rights and judical effectiveness are related:
# In this density chart, a lighter blue indicates higher density
# With the density chart we can see both the correlation between variables and where there is a bigger concentration of values. For instance, we can see in Europe most of the values are in the top right part of the charge (high Judical Effectiveness and Property Rights)
# PCA: 
# To use PCA, we need that all the variables are numeric, so:
Economic_Freedom <- data_imp$Economic_freedom
data_num = cbind(data[,2:14], data[,16:19], Economic_Freedom)

# From dimension 15 to dimension 2
boxplot(data_num, las=2, col="darkblue")

# scale or not to scale?
boxplot(scale(data_num), las=2, col="darkblue")

# En nuestro caso yo creo q está claro que necesitamos SCALE 



# With the following command we can see the correlation between all the variables
ggcorr(data_num, label = T)  

# We can check some that some relations between varibales that we 
# previously study through graphs were correct. 
# (Adult moratlity and Infant Moratlity, Property Rights and Jusical Effectivenes, etc.)


# Now, with PCA we are going to reduce from dimension 20 to dim 2:
pca = prcomp(data_num, scale=T)   # Notice that we have scaled the data
# pca = princomp(nba, cor=T) # the same, but using SVD instead of eigen decomposition 
summary(pca)
## Importance of components:
##                           PC1    PC2     PC3     PC4     PC5     PC6     PC7
## Standard deviation     3.1514 1.4017 1.24061 1.01963 0.99543 0.81557 0.64431
## Proportion of Variance 0.5517 0.1092 0.08551 0.05776 0.05505 0.03695 0.02306
## Cumulative Proportion  0.5517 0.6609 0.74639 0.80415 0.85920 0.89615 0.91922
##                           PC8     PC9    PC10    PC11    PC12    PC13    PC14
## Standard deviation     0.5401 0.52120 0.44410 0.41664 0.37850 0.35141 0.31566
## Proportion of Variance 0.0162 0.01509 0.01096 0.00964 0.00796 0.00686 0.00554
## Cumulative Proportion  0.9354 0.95051 0.96147 0.97111 0.97907 0.98593 0.99147
##                           PC15    PC16    PC17    PC18
## Standard deviation     0.27072 0.26127 0.08250 0.07234
## Proportion of Variance 0.00407 0.00379 0.00038 0.00029
## Cumulative Proportion  0.99554 0.99933 0.99971 1.00000
# ANother way to see the correlation matrix:
R = cor(data_num)   # correlation matrix
eigen(R) 
## eigen() decomposition
## $values
##  [1] 9.931137710 1.964831729 1.539125347 1.039642132 0.990873170 0.665147710
##  [7] 0.415129210 0.291655141 0.271653579 0.197225728 0.173592002 0.143265069
## [13] 0.123486262 0.099640990 0.073291301 0.068263840 0.006805486 0.005233594
## 
## $vectors
##               [,1]        [,2]         [,3]         [,4]        [,5]
##  [1,]  0.257670000 -0.28386644  0.132511618  0.060474260  0.02113734
##  [2,] -0.261019809 -0.10674269  0.317686994 -0.036159733  0.17791478
##  [3,]  0.255273417 -0.21190446  0.009050281 -0.136595989  0.28799217
##  [4,]  0.273545877 -0.28667851  0.121323767 -0.015365822  0.15796410
##  [5,] -0.286217833  0.22102565 -0.052976365  0.083638045 -0.18223484
##  [6,]  0.264642470 -0.32262191  0.143422126  0.009209958  0.17020140
##  [7,] -0.170392860  0.05201517  0.526339343 -0.264099981 -0.02539185
##  [8,] -0.277167904 -0.03758093  0.140230445 -0.024794451  0.21853109
##  [9,]  0.039842021  0.40710942  0.198372789  0.428136361  0.56855485
## [10,] -0.280835392 -0.14423675 -0.116035068 -0.048055691 -0.13181052
## [11,] -0.268324610 -0.14583993  0.147972773  0.094300558  0.18159978
## [12,] -0.136913211 -0.25254999 -0.391516363  0.440356711  0.23317683
## [13,] -0.253865311 -0.25518921 -0.188442001  0.053668496  0.21816029
## [14,] -0.273005474 -0.24795105 -0.057229899  0.003271300  0.09367161
## [15,] -0.007836825  0.10979334 -0.331813876 -0.701768262  0.45036621
## [16,] -0.265032602 -0.10122506  0.343577890 -0.069749948  0.06315639
## [17,] -0.156584284  0.30345203 -0.196863902 -0.065425276  0.19412563
## [18,] -0.250339292 -0.33362261 -0.113824569 -0.075928318 -0.14345286
##               [,6]         [,7]         [,8]        [,9]       [,10]
##  [1,] -0.025418577  0.029546915 -0.523509471  0.02698708 -0.16574794
##  [2,] -0.045546537 -0.047491845 -0.042712867  0.39827824 -0.30385001
##  [3,]  0.282480628 -0.048894507  0.466020174 -0.11138144  0.20705128
##  [4,]  0.072399929 -0.036781257 -0.067011632  0.05641391 -0.12243970
##  [5,] -0.184566415  0.030420899 -0.234926103  0.09054251 -0.04643238
##  [6,]  0.054451631 -0.028820763 -0.074446717  0.05499467 -0.11406407
##  [7,]  0.167690046  0.530603641 -0.162936424 -0.39872988  0.20045182
##  [8,] -0.067452839  0.146619907  0.224911409  0.05243396  0.23841552
##  [9,] -0.164932845 -0.181724690  0.006781735 -0.40545438 -0.19322089
## [10,]  0.014867197 -0.178655996  0.318382476 -0.22748244 -0.33171337
## [11,] -0.122091114 -0.196586692  0.084182071  0.37326185  0.44672525
## [12,] -0.005194540  0.669325617  0.075022652  0.08446032 -0.12742421
## [13,]  0.009801686 -0.170702717 -0.392510688 -0.15669527  0.33620321
## [14,]  0.088416410 -0.303660057 -0.178277632 -0.27088157  0.04064692
## [15,] -0.344926767  0.104148625 -0.098424976  0.02565909 -0.14467749
## [16,]  0.055020548 -0.003606066  0.126722028  0.15973214 -0.39696847
## [17,]  0.817559902 -0.046389489 -0.190047891  0.18831822 -0.10600375
## [18,]  0.043199676 -0.048873038  0.052363733 -0.36046412 -0.20626694
##             [,11]       [,12]        [,13]       [,14]       [,15]       [,16]
##  [1,]  0.01047177 -0.37744098  0.548178989  0.20768532 -0.18756116 -0.06273718
##  [2,] -0.19599888 -0.26859869 -0.176814095 -0.03071274  0.55176453 -0.28388114
##  [3,] -0.21225652 -0.40376265  0.026883022 -0.09214541 -0.04669883 -0.06662766
##  [4,]  0.13243664  0.41809289 -0.173661497  0.11144936  0.11562853  0.12875187
##  [5,]  0.02206691  0.03827488  0.003833234  0.01562716 -0.01743323  0.02104828
##  [6,]  0.15927375  0.38151866 -0.109604595  0.07912200  0.09518255  0.08704394
##  [7,] -0.21092237  0.10643343 -0.065002767  0.15670463  0.01618362 -0.02249547
##  [8,]  0.77435484 -0.10079168  0.277429706  0.08394239  0.14980646  0.00619606
##  [9,] -0.07991078  0.05657374  0.110903930 -0.02934633  0.03371643  0.02863997
## [10,] -0.10579883 -0.02716694  0.068856739  0.72073570  0.04314080  0.18256212
## [11,] -0.38810861  0.35333907  0.339431751  0.10930211 -0.19748002 -0.01344081
## [12,] -0.11009151  0.02301842 -0.075818152  0.02817308 -0.10272989 -0.09348576
## [13,] -0.03921756 -0.25764932 -0.280599234 -0.03487717  0.16625816  0.53421326
## [14,]  0.17030944  0.04687785 -0.284226001  0.02607905 -0.32977903 -0.65060087
## [15,] -0.05979428  0.07734440  0.055652677  0.01935091 -0.08657902 -0.02637891
## [16,]  0.07262592 -0.10209993 -0.129929638 -0.28935751 -0.58375662  0.36421183
## [17,]  0.03631152  0.11933305  0.171867281  0.05019239  0.02742122  0.02542031
## [18,] -0.08873815  0.22475321  0.443952943 -0.52471398  0.27348331  0.01296648
##              [,17]        [,18]
##  [1,] -0.046488149 -0.009987675
##  [2,] -0.002678718  0.040946536
##  [3,] -0.036322639 -0.456492433
##  [4,] -0.700471668 -0.135250566
##  [5,] -0.043652817 -0.849009982
##  [6,]  0.707238947 -0.211806259
##  [7,]  0.006002004 -0.007558793
##  [8,] -0.034345938 -0.006984421
##  [9,] -0.006794095 -0.004089904
## [10,]  0.023526060 -0.003893175
## [11,] -0.010762926  0.038807822
## [12,] -0.010029600  0.017158024
## [13,]  0.015796409  0.049106822
## [14,] -0.019381040 -0.003233845
## [15,]  0.011018207  0.005824470
## [16,] -0.006146502  0.025423657
## [17,]  0.016820195  0.002259013
## [18,] -0.025696349 -0.028638351
# How many components?
fviz_screeplot(pca, addlabels = TRUE)

# With one component we can explain 55% of the variance of our data, with
# 2 more than 60% and with 3  components almost 75%.

# First component:
barplot(pca$rotation[,1], las=2, col="darkblue")

# Re-escribir esto q es un copia pega:
# Squared loadings are easier to interpret than the loadings
# I.e. they are like percentages (numbers between 0 and 1)
# So let's plot squared loadings instead
# They are called contribution of variables to components
# So let's plot squared loadings instead
# They are called contribution of variables to component
fviz_contrib(pca, choice = "var", axes = 1)

# Re-escribir:
# The red dashed line on the graph above indicates the expected average contribution 

# Now we can rank the countries by their first PC scores: 
names = data[,1]

# The best
names[order(pca$x[,1])][(length(names)-10):length(names)]
##  [1] "Iceland"       "Denmark"       "United States" "Singapore"    
##  [5] "Sweden"        "Australia"     "Ireland"       "Netherlands"  
##  [9] "Switzerland"   "Norway"        "Luxembourg"
# Another way (doesn't give us the exact same result, but it is very similar) 
# to see the best 10 countries according to the first principal component:
calculateScore = function(data) {
  return(sum((pca$rotation[, 1]*data)^2))
}
data$Country[sort.int(apply(data_num, 1, calculateScore), decreasing = T, index.return = T)$ix[1:10]]
##  [1] "Luxembourg"        "Singapore"         "Ireland"          
##  [4] "Qatar"             "Switzerland"       "Norway"           
##  [7] "Brunei Darussalam" "United States"     "Denmark"          
## [10] "Netherlands"
# The worst
names[order(pca$x[,1])][1:10]
##  [1] "Afghanistan"              "Sierra Leone"            
##  [3] "Chad"                     "Congo, Dem. Rep."        
##  [5] "Niger"                    "Central African Republic"
##  [7] "Guinea-Bissau"            "Liberia"                 
##  [9] "Burundi"                  "Angola"
# Now, let's compute the second component
barplot(pca$rotation[,2], las=2, col="lawngreen")

# Contribution of variables to second component
fviz_contrib(pca, choice = "var", axes = 2)

# Now we can rank the countries by their second PC scores: 
names[order(pca$x[,2])][1:10] # Countries with high infant mortality
##  [1] "Sierra Leone"  "Niger"         "Mali"          "Somalia"      
##  [5] "Burkina Faso"  "Guinea-Bissau" "Chad"          "Angola"       
##  [9] "Benin"         "Liberia"
names[order(pca$x[,2])][(length(names)-10):length(names)] 
##  [1] "Dominica"      "Uzbekistan"    "China"         "Algeria"      
##  [5] "Syria"         "Belarus"       "Turkmenistan"  "Iran"         
##  [9] "Cuba"          "Venezuela"     "Korea, North "

PCA

Firs look:

# To use PCA, we need that all the variables are numeric, so:
Economic_Freedom <- data_imp$Economic_freedom
data_num = cbind(data[,2:14], data[,16:19], Economic_Freedom)

# We need to sacale:
boxplot(data_num, las=2, col="darkblue")

boxplot(scale(data_num), las=2, col="darkblue")

# With the following command we can see the correlation between all the variables
ggcorr(data_num, label = T)  

# We can check some that some relations between variables that we 
# previously study through graphs were correct. 
# (Adult moratlity and Infant Moratlity, Property Rights and Jusical Effectivenes, etc.)


# Another way to see the correlation matrix:
R = cor(data_num)   # correlation matrix
eigen(R) 
## eigen() decomposition
## $values
##  [1] 9.931137710 1.964831729 1.539125347 1.039642132 0.990873170 0.665147710
##  [7] 0.415129210 0.291655141 0.271653579 0.197225728 0.173592002 0.143265069
## [13] 0.123486262 0.099640990 0.073291301 0.068263840 0.006805486 0.005233594
## 
## $vectors
##               [,1]        [,2]         [,3]         [,4]        [,5]
##  [1,]  0.257670000 -0.28386644  0.132511618  0.060474260  0.02113734
##  [2,] -0.261019809 -0.10674269  0.317686994 -0.036159733  0.17791478
##  [3,]  0.255273417 -0.21190446  0.009050281 -0.136595989  0.28799217
##  [4,]  0.273545877 -0.28667851  0.121323767 -0.015365822  0.15796410
##  [5,] -0.286217833  0.22102565 -0.052976365  0.083638045 -0.18223484
##  [6,]  0.264642470 -0.32262191  0.143422126  0.009209958  0.17020140
##  [7,] -0.170392860  0.05201517  0.526339343 -0.264099981 -0.02539185
##  [8,] -0.277167904 -0.03758093  0.140230445 -0.024794451  0.21853109
##  [9,]  0.039842021  0.40710942  0.198372789  0.428136361  0.56855485
## [10,] -0.280835392 -0.14423675 -0.116035068 -0.048055691 -0.13181052
## [11,] -0.268324610 -0.14583993  0.147972773  0.094300558  0.18159978
## [12,] -0.136913211 -0.25254999 -0.391516363  0.440356711  0.23317683
## [13,] -0.253865311 -0.25518921 -0.188442001  0.053668496  0.21816029
## [14,] -0.273005474 -0.24795105 -0.057229899  0.003271300  0.09367161
## [15,] -0.007836825  0.10979334 -0.331813876 -0.701768262  0.45036621
## [16,] -0.265032602 -0.10122506  0.343577890 -0.069749948  0.06315639
## [17,] -0.156584284  0.30345203 -0.196863902 -0.065425276  0.19412563
## [18,] -0.250339292 -0.33362261 -0.113824569 -0.075928318 -0.14345286
##               [,6]         [,7]         [,8]        [,9]       [,10]
##  [1,] -0.025418577  0.029546915 -0.523509471  0.02698708 -0.16574794
##  [2,] -0.045546537 -0.047491845 -0.042712867  0.39827824 -0.30385001
##  [3,]  0.282480628 -0.048894507  0.466020174 -0.11138144  0.20705128
##  [4,]  0.072399929 -0.036781257 -0.067011632  0.05641391 -0.12243970
##  [5,] -0.184566415  0.030420899 -0.234926103  0.09054251 -0.04643238
##  [6,]  0.054451631 -0.028820763 -0.074446717  0.05499467 -0.11406407
##  [7,]  0.167690046  0.530603641 -0.162936424 -0.39872988  0.20045182
##  [8,] -0.067452839  0.146619907  0.224911409  0.05243396  0.23841552
##  [9,] -0.164932845 -0.181724690  0.006781735 -0.40545438 -0.19322089
## [10,]  0.014867197 -0.178655996  0.318382476 -0.22748244 -0.33171337
## [11,] -0.122091114 -0.196586692  0.084182071  0.37326185  0.44672525
## [12,] -0.005194540  0.669325617  0.075022652  0.08446032 -0.12742421
## [13,]  0.009801686 -0.170702717 -0.392510688 -0.15669527  0.33620321
## [14,]  0.088416410 -0.303660057 -0.178277632 -0.27088157  0.04064692
## [15,] -0.344926767  0.104148625 -0.098424976  0.02565909 -0.14467749
## [16,]  0.055020548 -0.003606066  0.126722028  0.15973214 -0.39696847
## [17,]  0.817559902 -0.046389489 -0.190047891  0.18831822 -0.10600375
## [18,]  0.043199676 -0.048873038  0.052363733 -0.36046412 -0.20626694
##             [,11]       [,12]        [,13]       [,14]       [,15]       [,16]
##  [1,]  0.01047177 -0.37744098  0.548178989  0.20768532 -0.18756116 -0.06273718
##  [2,] -0.19599888 -0.26859869 -0.176814095 -0.03071274  0.55176453 -0.28388114
##  [3,] -0.21225652 -0.40376265  0.026883022 -0.09214541 -0.04669883 -0.06662766
##  [4,]  0.13243664  0.41809289 -0.173661497  0.11144936  0.11562853  0.12875187
##  [5,]  0.02206691  0.03827488  0.003833234  0.01562716 -0.01743323  0.02104828
##  [6,]  0.15927375  0.38151866 -0.109604595  0.07912200  0.09518255  0.08704394
##  [7,] -0.21092237  0.10643343 -0.065002767  0.15670463  0.01618362 -0.02249547
##  [8,]  0.77435484 -0.10079168  0.277429706  0.08394239  0.14980646  0.00619606
##  [9,] -0.07991078  0.05657374  0.110903930 -0.02934633  0.03371643  0.02863997
## [10,] -0.10579883 -0.02716694  0.068856739  0.72073570  0.04314080  0.18256212
## [11,] -0.38810861  0.35333907  0.339431751  0.10930211 -0.19748002 -0.01344081
## [12,] -0.11009151  0.02301842 -0.075818152  0.02817308 -0.10272989 -0.09348576
## [13,] -0.03921756 -0.25764932 -0.280599234 -0.03487717  0.16625816  0.53421326
## [14,]  0.17030944  0.04687785 -0.284226001  0.02607905 -0.32977903 -0.65060087
## [15,] -0.05979428  0.07734440  0.055652677  0.01935091 -0.08657902 -0.02637891
## [16,]  0.07262592 -0.10209993 -0.129929638 -0.28935751 -0.58375662  0.36421183
## [17,]  0.03631152  0.11933305  0.171867281  0.05019239  0.02742122  0.02542031
## [18,] -0.08873815  0.22475321  0.443952943 -0.52471398  0.27348331  0.01296648
##              [,17]        [,18]
##  [1,] -0.046488149 -0.009987675
##  [2,] -0.002678718  0.040946536
##  [3,] -0.036322639 -0.456492433
##  [4,] -0.700471668 -0.135250566
##  [5,] -0.043652817 -0.849009982
##  [6,]  0.707238947 -0.211806259
##  [7,]  0.006002004 -0.007558793
##  [8,] -0.034345938 -0.006984421
##  [9,] -0.006794095 -0.004089904
## [10,]  0.023526060 -0.003893175
## [11,] -0.010762926  0.038807822
## [12,] -0.010029600  0.017158024
## [13,]  0.015796409  0.049106822
## [14,] -0.019381040 -0.003233845
## [15,]  0.011018207  0.005824470
## [16,] -0.006146502  0.025423657
## [17,]  0.016820195  0.002259013
## [18,] -0.025696349 -0.028638351

Creation of the principal components:

# Now, with PCA we are going to reduce from dimension 20 to dim 2:
pca = prcomp(data_num, scale=T)   
# Notice that we have scaled the data
summary(pca)
## Importance of components:
##                           PC1    PC2     PC3     PC4     PC5     PC6     PC7
## Standard deviation     3.1514 1.4017 1.24061 1.01963 0.99543 0.81557 0.64431
## Proportion of Variance 0.5517 0.1092 0.08551 0.05776 0.05505 0.03695 0.02306
## Cumulative Proportion  0.5517 0.6609 0.74639 0.80415 0.85920 0.89615 0.91922
##                           PC8     PC9    PC10    PC11    PC12    PC13    PC14
## Standard deviation     0.5401 0.52120 0.44410 0.41664 0.37850 0.35141 0.31566
## Proportion of Variance 0.0162 0.01509 0.01096 0.00964 0.00796 0.00686 0.00554
## Cumulative Proportion  0.9354 0.95051 0.96147 0.97111 0.97907 0.98593 0.99147
##                           PC15    PC16    PC17    PC18
## Standard deviation     0.27072 0.26127 0.08250 0.07234
## Proportion of Variance 0.00407 0.00379 0.00038 0.00029
## Cumulative Proportion  0.99554 0.99933 0.99971 1.00000

How many components?

fviz_screeplot(pca, addlabels = TRUE)

# With one component we can explain 55% of the variance of our data. 
# With 2, more than 60%.

# First component:
# What we see is the contribution of variables to components.
barplot(pca$rotation[,1], las=2, col="darkblue")

# We can also do the square loadings plot, which is easier to understand.
# So let's plot squared loadings instead
# They are called contribution of variables to component
fviz_contrib(pca, choice = "var", axes = 1)

# The first component gives more importance to the variables Life Expectancy, Cell phones, Business Freedom... to classify the countries.
# With this, we can guess that it is going to classify the countries for their quality of life level.

# Now we can rank the countries by their first PC scores: 
names = data[,1]

# The best
names[order(pca$x[,1])][(length(names)-10):length(names)]
##  [1] "Iceland"       "Denmark"       "United States" "Singapore"    
##  [5] "Sweden"        "Australia"     "Ireland"       "Netherlands"  
##  [9] "Switzerland"   "Norway"        "Luxembourg"
# The worst
names[order(pca$x[,1])][1:10]
##  [1] "Afghanistan"              "Sierra Leone"            
##  [3] "Chad"                     "Congo, Dem. Rep."        
##  [5] "Niger"                    "Central African Republic"
##  [7] "Guinea-Bissau"            "Liberia"                 
##  [9] "Burundi"                  "Angola"

Now, the second component:

# Now, let's compute the second component:
barplot(pca$rotation[,2], las=2, col="lawngreen")

# Contribution of variables to second component
fviz_contrib(pca, choice = "var", axes = 2)

# In this case, business freedom and cell phones, which were very important for the first component, are among the ones with less importance for the second comopnent.

# Now we can rank the countries by their second PC scores: 
names[order(pca$x[,2])][1:10] # Countries with high infant mortality
##  [1] "Sierra Leone"  "Niger"         "Mali"          "Somalia"      
##  [5] "Burkina Faso"  "Guinea-Bissau" "Chad"          "Angola"       
##  [9] "Benin"         "Liberia"
names[order(pca$x[,2])][(length(names)-10):length(names)] 
##  [1] "Dominica"      "Uzbekistan"    "China"         "Algeria"      
##  [5] "Syria"         "Belarus"       "Turkmenistan"  "Iran"         
##  [9] "Cuba"          "Venezuela"     "Korea, North "
# Once we have interpreted the meaning of the first two components, let's see the contribution of each country to components
head(get_pca_ind(pca)$contrib[,1]) # this is in %
##           1           2           3           4           5           6 
## 1.981209531 0.041269022 0.044932225 1.593738153 0.023609652 0.001467308
head((pca$x[,1]^2)/(pca$sdev[1]^2))/dim(data_num)[1] # between 0 and 1
##            1            2            3            4            5            6 
## 1.981210e-02 4.126902e-04 4.493222e-04 1.593738e-02 2.360965e-04 1.467308e-05
# Countries contributions to first component:
# The top 50 contributions
fviz_contrib(pca, choice = "ind", axes = 1, top=50)

# All contributions
fviz_contrib(pca, choice = "ind", axes = 1)

#  The top 10 countries that contribute to the first component
names[order(get_pca_ind(pca)$contrib[,1],decreasing=T)][1:10]
##  [1] "Luxembourg"               "Afghanistan"             
##  [3] "Sierra Leone"             "Norway"                  
##  [5] "Chad"                     "Congo, Dem. Rep."        
##  [7] "Niger"                    "Central African Republic"
##  [9] "Guinea-Bissau"            "Switzerland"
# Finally, let's make a zoom to see the top-30 countries in contributions.
# Also, let's plot it in a graph to see it clearer
names_z1 = names[order(get_pca_ind(pca)$contrib[,1],decreasing=T)]
fviz_contrib(pca, choice = "ind", axes = 1, top=30)+
  scale_x_discrete(labels=names_z1)

Biplot

# observations and variables in same graph (using first 2 components as axes)
biplot(pca)

# variables around the center of the graph doesn't contribute much
# to any of the to PC, whereas variables in the corners are the most significant to both principal components

# Let's remove countries from the graph so we can see clearer the contribution of each variable
fviz_pca_var(pca, col.var = "contrib")

# Another different look:
fviz_pca_biplot(pca, repel = TRUE)
## Warning: ggrepel: 129 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

Scores:

data.frame(z1=-pca$x[,1],z2=pca$x[,2]) %>% 
  ggplot(aes(z1,z2,label=names)) + geom_point(size=0) +
  labs(title="PCA", x="PC1", y="PC2") +
  theme_bw() + scale_color_gradient(low="grey", high="black")+
  theme(legend.position="bottom") + geom_text(size=2, hjust=0.6, vjust=0, check_overlap = TRUE)

# The first two PCs  are always uncorrelated.
# We can change the color to see the relation for different variables:
data.frame(z1=-pca$x[,1],z2=pca$x[,2]) %>% 
  ggplot(aes(z1,z2,label=names,color=data_num$Life_Expentancy)) + geom_point(size=0) +
  labs(title="PCA", x="PC1", y="PC2") +
  theme_bw() + scale_color_gradient(low="lightblue", high="darkblue")+
  theme(legend.position="bottom") + geom_text(size=2, hjust=0.6, vjust=0, check_overlap = TRUE) 

# We can see that, the first component is highly correlated with Life Expentacy.

# PC1 is also kind of related with GNI per capita PPP:
data.frame(z1=-pca$x[,1],z2=pca$x[,2]) %>% 
  ggplot(aes(z1,z2,label=names,color=data_num$GNI_Capita_PPP)) + geom_point(size=0) +
  labs(title="PCA", x="PC1", y="PC2") +
  theme_bw() + scale_color_gradient(low="lightblue", high="darkblue")+
  theme(legend.position="bottom") + geom_text(size=2, hjust=0.6, vjust=0, check_overlap = TRUE) 

# Relation between PC1 and number of cell phones:
data.frame(z1=-pca$x[,1],z2=pca$x[,2]) %>% 
  ggplot(aes(z1,z2,label=names,color=data_num$Cell_phones)) + geom_point(size=0) +
  labs(title="PCA", x="PC1", y="PC2") +
  theme_bw() + scale_color_gradient(low="grey", high="black")+
  theme(legend.position="bottom") + geom_text(size=2, hjust=0.6, vjust=0, check_overlap = TRUE) 

# Which are the regions with the better countries to live?
region = data[,20]
data.frame(z1=-pca$x[,1],region=region) %>% 
  group_by(region) %>% summarise(mean=mean(z1)) %>% arrange(desc(mean))
# Europe is the region with the better countries to live of the world (overall), 
# followed by the American continent. According to this, the worst
# region to live is the Sub-Saharan Africa.
# I think it broadly coincides with the perception we all have.

Factor analysis

data.f <- factanal(data_num, factors = 3, rotation="none", scores="regression", lower = 0.01)
data.f
## 
## Call:
## factanal(x = data_num, factors = 3, scores = "regression", rotation = "none",     lower = 0.01)
## 
## Uniquenesses:
##             Fertility        GNI_Capita_PPP       Adult_Mortality 
##                 0.219                 0.309                 0.010 
##      Infant_Mortality       Life_Expentancy      Under5_mortality 
##                 0.010                 0.010                 0.010 
##                   CO2           Cell_phones             Inflation 
##                 0.762                 0.268                 0.908 
##      Business_Freedom        Internet_users             Democracy 
##                 0.195                 0.244                 0.763 
## Judical_Effectiveness       Property_Rights          Unemployment 
##                 0.229                 0.122                 0.924 
##        GDP_capita_PPP    Education_Equality      Economic_Freedom 
##                 0.307                 0.637                 0.260 
## 
## Loadings:
##                       Factor1 Factor2 Factor3
## Fertility             -0.853           0.216 
## GNI_Capita_PPP         0.620   0.523   0.184 
## Adult_Mortality       -0.928   0.108  -0.343 
## Infant_Mortality      -0.972           0.213 
## Life_Expentancy        0.990           0.107 
## Under5_mortality      -0.965           0.239 
## CO2                    0.425   0.223         
## Cell_phones            0.706   0.480         
## Inflation                     -0.293         
## Business_Freedom       0.743   0.503         
## Internet_users         0.647   0.560   0.152 
## Democracy              0.299   0.380         
## Judical_Effectiveness  0.589   0.647         
## Property_Rights        0.633   0.679   0.127 
## Unemployment                          -0.274 
## GDP_capita_PPP         0.634   0.513   0.165 
## Education_Equality     0.505          -0.324 
## Economic_Freedom       0.574   0.630   0.119 
## 
##                Factor1 Factor2 Factor3
## SS loadings      8.307   2.912   0.598
## Proportion Var   0.462   0.162   0.033
## Cumulative Var   0.462   0.623   0.657
## 
## Test of the hypothesis that 3 factors are sufficient.
## The chi square statistic is 738.49 on 102 degrees of freedom.
## The p-value is 3.84e-97
cbind(data.f$loadings, data.f$uniquenesses)
##                            Factor1     Factor2     Factor3          
## Fertility             -0.853186556 -0.07757631  0.21615584 0.2193291
## GNI_Capita_PPP         0.619970107  0.52268354  0.18375539 0.3086757
## Adult_Mortality       -0.928104127  0.10767357 -0.34276064 0.0100000
## Infant_Mortality      -0.972186499  0.00201423  0.21278436 0.0100000
## Life_Expentancy        0.990251065  0.01323967  0.10662610 0.0100000
## Under5_mortality      -0.965139112  0.05283313  0.23901820 0.0100000
## CO2                    0.425124223  0.22335847  0.08864518 0.7615293
## Cell_phones            0.706239620  0.47952894  0.05550602 0.2682003
## Inflation             -0.009932774 -0.29261929 -0.07776372 0.9082518
## Business_Freedom       0.742999288  0.50250693  0.01002447 0.1953351
## Internet_users         0.647495676  0.55987547  0.15171784 0.2442742
## Democracy              0.298903870  0.38038885  0.04998289 0.7634613
## Judical_Effectiveness  0.588927910  0.64650782  0.07754008 0.2291771
## Property_Rights        0.633346407  0.67904498  0.12693440 0.1216577
## Unemployment           0.030682211  0.02282905 -0.27387600 0.9235206
## GDP_capita_PPP         0.634374042  0.51335112  0.16545675 0.3066681
## Education_Equality     0.505307469  0.05129792 -0.32361272 0.6373353
## Economic_Freedom       0.574093553  0.62953589  0.11928630 0.2598670
# var explained by first three factors is around 66%
par(mfrow=c(3,1))  # This is to view the three graphs at the same time
barplot(data.f$loadings[,1], names=F, las=2, col="darkblue", ylim = c(-1, 1))
barplot(data.f$loadings[,2], names=F, las=2, col="darkblue", ylim = c(-1, 1))
barplot(data.f$loadings[,3], las=2, col="darkblue", ylim = c(-1, 1))

With two factors it looks as follows

data.f2 <- factanal(data_num, factors = 2, rotation="varimax", scores="Bartlett", lower = 0.01)
data.f2
## 
## Call:
## factanal(x = data_num, factors = 2, scores = "Bartlett", rotation = "varimax",     lower = 0.01)
## 
## Uniquenesses:
##             Fertility        GNI_Capita_PPP       Adult_Mortality 
##                 0.225                 0.302                 0.282 
##      Infant_Mortality       Life_Expentancy      Under5_mortality 
##                 0.010                 0.085                 0.010 
##                   CO2           Cell_phones             Inflation 
##                 0.754                 0.262                 0.912 
##      Business_Freedom        Internet_users             Democracy 
##                 0.200                 0.242                 0.778 
## Judical_Effectiveness       Property_Rights          Unemployment 
##                 0.258                 0.135                 0.984 
##        GDP_capita_PPP    Education_Equality      Economic_Freedom 
##                 0.294                 0.686                 0.264 
## 
## Loadings:
##                       Factor1 Factor2
## Fertility             -0.389  -0.789 
## GNI_Capita_PPP         0.784   0.290 
## Adult_Mortality       -0.433  -0.728 
## Infant_Mortality      -0.378  -0.920 
## Life_Expentancy        0.491   0.821 
## Under5_mortality      -0.324  -0.941 
## CO2                    0.424   0.257 
## Cell_phones            0.747   0.425 
## Inflation             -0.271   0.119 
## Business_Freedom       0.762   0.468 
## Internet_users         0.812   0.313 
## Democracy              0.457   0.112 
## Judical_Effectiveness  0.821   0.262 
## Property_Rights        0.889   0.272 
## Unemployment                   0.113 
## GDP_capita_PPP         0.780   0.312 
## Education_Equality     0.171   0.534 
## Economic_Freedom       0.824   0.240 
## 
##                Factor1 Factor2
## SS loadings      6.489   4.829
## Proportion Var   0.361   0.268
## Cumulative Var   0.361   0.629
## 
## Test of the hypothesis that 2 factors are sufficient.
## The chi square statistic is 1143.42 on 118 degrees of freedom.
## The p-value is 2e-167
cbind(data.f2$loadings, data.f2$uniquenesses)  
##                          Factor1    Factor2           
## Fertility             -0.3892152 -0.7893461 0.22544824
## GNI_Capita_PPP         0.7838405  0.2896214 0.30171420
## Adult_Mortality       -0.4333226 -0.7279830 0.28225480
## Infant_Mortality      -0.3784391 -0.9204879 0.01000000
## Life_Expentancy        0.4914202  0.8205697 0.08517211
## Under5_mortality      -0.3240280 -0.9413097 0.01000000
## CO2                    0.4244565  0.2569622 0.75377230
## Cell_phones            0.7465727  0.4253317 0.26172656
## Inflation             -0.2713886  0.1188333 0.91221839
## Business_Freedom       0.7623408  0.4680862 0.19972464
## Internet_users         0.8124896  0.3132018 0.24176451
## Democracy              0.4572141  0.1120195 0.77838616
## Judical_Effectiveness  0.8206817  0.2622013 0.25773601
## Property_Rights        0.8892529  0.2719540 0.13526989
## Unemployment          -0.0572678  0.1133762 0.98375625
## GDP_capita_PPP         0.7803381  0.3116981 0.29391309
## Education_Equality     0.1706461  0.5335385 0.68621291
## Economic_Freedom       0.8237478  0.2397151 0.26397122
par(mfrow=c(2,1))
barplot(data.f2$loadings[,1], names=F, las=2, col="darkblue", ylim = c(-1, 1))
barplot(data.f2$loadings[,2], las=2, col="darkblue", ylim = c(-1, 1))

# The first two factors can be interpreted, from my point of view, as follows: 
# one with more importance to health variables (life expentancy, mortality...),
# and the second one with more weights to economic variables (PIB per capita, economic freedom...)
# The third factor is a bit more difficult to decribe.

# Distribution of the score of the countries with each factor:
factor.df1 = data.frame(Country=data$Country, data.f2$scores) %>% gather("factor", "score", -Country)
factor.df1 %>%
  ggplot(aes(x=Country,y=score)) + geom_point(size=1) + 
  theme_bw() + theme(legend.position="bottom") + scale_color_brewer(palette="Dark2") +
  facet_wrap(~factor, ncol =1) +
  labs(title="2-factor model", x="", y="scores", col="") 

CLUSTERING

# The grpah where we are going to plot the countries is the same
# that we did before with the two principal components (the axes are the PC)
data.frame(z1=-pca$x[,1],z2=pca$x[,2]) %>% 
  ggplot(aes(z1,z2,label=names)) + geom_point(size=0) +
  labs(title="PCA", x="PC1", y="PC2") +
  theme_bw() +theme(legend.position="bottom") + 
  geom_text(size=2, hjust=0.6, vjust=0, check_overlap = TRUE) 

# Scale the data:
X = scale(data_num)

How many centers?

# Based on wss (total within sum of square)
fviz_nbclust(X, kmeans, method = 'wss')

# Here, based on the "elbow method" we could guess that the optimum
# number of centers is 3.


# Based on shiloutte 
fviz_nbclust(X, kmeans, method = 'silhouette')

# Give us that 2 and 3 are the two best number of centers

# Based on the gap statistic (using bootstrap)
fviz_nbclust(X, kmeans, method = 'gap_stat', k.max = 20)

# According to the gap statistic, we should select 3 centers

After executing the three methods I beleive that the optimum number of centers is 3.

##Kmeans

fit = kmeans(X, centers=3, nstart=100)
groups = fit$cluster
groups
##   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19  20 
##   2   1   1   2   1   1   3   3   1   3   3   1   3   1   3   1   2   1   1   1 
##  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36  37  38  39  40 
##   1   1   3   3   2   2   1   1   2   3   2   2   3   1   1   2   2   2   1   3 
##  41  42  43  44  45  46  47  48  49  50  51  52  53  54  55  56  57  58  59  60 
##   1   3   3   3   2   1   1   1   1   1   2   2   3   2   2   1   3   3   1   2 
##  61  62  63  64  65  66  67  68  69  70  71  72  73  74  75  76  77  78  79  80 
##   1   3   2   3   1   2   2   1   1   1   3   3   1   1   1   2   3   3   3   1 
##  81  82  83  84  85  86  87  88  89  90  91  92  93  94  95  96  97  98  99 100 
##   3   1   1   2   1   1   3   3   1   2   3   1   2   2   1   3   3   1   2   2 
## 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 
##   3   1   2   3   2   3   1   1   1   1   1   1   2   2   1   2   3   3   1   2 
## 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 
##   2   3   1   2   1   1   1   1   1   3   3   3   3   1   2   1   1   1   1   1 
## 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 158 159 160 161 
##   1   1   2   3   3   3   1   2   1   3   1   2   1   3   3   1   1   2   1   2 
## 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 
##   2   1   1   1   1   1   2   1   3   3   3   1   1   1   1   1   2   2   2
# Are the groups well balanced?
barplot(table(groups), col="blue")

# Groups 2 and 3 have a similar dimension, 1 it is the one that is  bigger. But I would say that it is more or less a normal distribution of all our data. It is not that we have a lot of countries in one group and the others are almost empty. In this case, the three groups have  a reasonable number of countries

# Variables for each center:
centers=fit$centers
centers
##    Fertility GNI_Capita_PPP Adult_Mortality Infant_Mortality Life_Expentancy
## 1 -0.2720572     -0.3750661      -0.1898215       -0.2619509       0.1770181
## 2  1.3777453     -0.7361939       1.2674339        1.3886070      -1.3708876
## 3 -0.8270098      1.3340912      -0.8644276       -0.8545316       0.9834960
##   Under5_mortality        CO2 Cell_phones   Inflation Business_Freedom
## 1       -0.3183422 -0.2055099  -0.2203137  0.10703621      -0.08075569
## 2        1.4053888 -0.5682594  -0.9786780 -0.05056037      -1.03517861
## 3       -0.7736151  0.8857707   1.2964397 -0.13602580       1.11023865
##   Internet_users   Democracy Judical_Effectiveness Property_Rights Unemployment
## 1     -0.3688983 -0.04221718            -0.2031830      -0.2694080    0.2758798
## 2     -0.7684335 -0.55341482            -0.8147002      -0.8476192   -0.2340903
## 3      1.3537836  0.59190459             1.1131343       1.2575664   -0.2531785
##   GDP_capita_PPP Education_Equality Economic_Freedom
## 1     -0.3326723          0.3481310       -0.2152980
## 2     -0.7796751         -1.0085497       -0.7667696
## 3      1.3022352          0.3500057        1.0889068
# Who are the countries in the first group? 
i=1  # plotting the centers in cluster 1
bar1=barplot(centers[i,], las=2, col="darkblue", ylim=c(-2,2), 
             main=paste("Cluster", i,": Group center in blue, global center in red"))
points(bar1,y=apply(X, 2, quantile, 0.50),col="red",pch=19)

# Third-word countries

# Second group
i=2  # plotting the centers in cluster 2
bar2=barplot(centers[i,], las=2, col="darkblue", ylim=c(-2,2), 
             main=paste("Cluster", i,": Group center in blue, global center in red"))
points(bar2,y=apply(X, 2, quantile, 0.50),col="red",pch=19)

# We could guess that they are Developing countries


# Third group
i=3  # plotting the centers in cluster 3
bar3=barplot(centers[i,], las=2, col="darkblue", ylim=c(-2,2), 
             main=paste("Cluster", i,": Group center in blue, global center in red"))
points(bar2,y=apply(X, 2, quantile, 0.50),col="red",pch=19)

# Countries with less fertility than the mean, higher GDP and Life expectancy... It seems this are richer countries (firs-world countries)


# I have notice that when running the code several times, the number of the group changes. I always get the same clusters, but sometimes the richer countries are in group 3 and sometimes in 1. I say this beacuse if the coments does not coincide with the graphs, it is because it has changed the number of the group. Nevertheless, the clusters I get is always the same, so the conclusions are equally valid, the only thing that changes is the number of the group.

Clusplot

fviz_cluster(fit, data = X, geom = c("point"),ellipse.type = 'norm', pointsize=1)+
  theme_minimal()+geom_text(label=names,hjust=0, vjust=0,size=2,check_overlap = T)+scale_fill_brewer(palette="Paired")

# After watching the plot, we can confirm what we guess with the barcharts.
# Broadly, group 3 includes first-word countries (high-developed countries)
# such as Australia, Singapore, Netherlands... In the first group
# we can find south-american countries (Argentina, Brazil), 
# the poorer countries in Europe (Montenegro, Bosnia), and some north-african
# countries (Algeria, Egypt, Cameroon)
# Finally, in the second group we have the sub-saharian countries mainly

Silhouette plot

# The silhouette value in [-1,1] measures the similarity (cohesion) of a data point to its cluster relative to other clusters (separation). 
# Silhouette plots rely on a distance metric and suggest that the data matches its own cluster well.
# The larger the silhouette widths, the better.

d <- dist(X, method="euclidean")  
sil = silhouette(groups, d)
plot(sil, col=1:5, main="", border=NA)

summary(sil)
## Silhouette of 179 units in 3 clusters from silhouette.default(x = groups, dist = d) :
##  Cluster sizes and average silhouette widths:
##        84        46        49 
## 0.2760713 0.2787378 0.3992127 
## Individual silhouette widths:
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -0.02774  0.20151  0.32765  0.31047  0.42400  0.58512
# Our average silhouette width is 0.31, which is pretty good. 

Profile variables

# From the dataset with all of our data let's get some variables that we exclude from the clustering and let's see if  we can draw any other conclusions:
age = total.data$Population.median.age..years.
age = age[-which(vec>6)] # remember that in the preprocessing we removed
# a row which has many missing values. We have to do this again so our 
# vector has the appropiate length.
summary(age) # although there are some NAs, the graph automatically is not going to plot the missing values
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   15.00   20.00   25.00   26.85   35.00   43.00       3
as.data.frame(X) %>% mutate(cluster=factor(groups), names=names, Age=age) %>%
  ggplot(aes(x = cluster, y = Age)) + 
  geom_boxplot(fill="darkblue") +
  labs(title = "Age by cluster", x = "", y = "", col = "") 
## Warning: Removed 3 rows containing non-finite values (stat_boxplot).

# Conclusion: The median age is higher the more developed the country is.
# This is something generally know, in poorer countries the birth rate
# is higher and life expectancy lower, resulting in lower median ages.

Antoher interesnting graphs

urban_population = total.data$Urban_population_pct_of_total; urban_population = urban_population[-which(vec>6)]
total_population = total.data$Population..Millions. ; total_population = total_population[-which(vec>6)]

as.data.frame(X) %>% mutate(cluster=factor(groups), names=names, Population=total_population) %>%
  ggplot(aes(x = cluster, y = Population)) + 
  geom_boxplot(fill="darkblue") +  scale_y_continuous(trans='log10')+
  labs(title = "Minutes played by cluster", x = "", y = "", col = "") 

# Population is not relevant to determine the quality of life of a country.
# We may think that bigger economies may have more power or influence to 
# impose their interest globlally and take advantage, but we have seen
# that in fact the size of a country/economy is not important. In fact,
# some countries that have been leading our research are really small (Luxembourg, Switzerland...)

as.data.frame(X) %>% mutate(cluster=factor(groups), names=names, Urban_pop=urban_population) %>%
  ggplot(aes(x = cluster, y = Urban_pop)) + 
  geom_boxplot(fill="darkblue") +
  labs(title = "Minutes played by cluster", x = "", y = "", col = "") 
## Warning: Removed 4 rows containing non-finite values (stat_boxplot).

# In higher-developped countries people live more in cities, there is more urban population

MAHALANOBIS

# kmeans with MAHALANOBIS distance
S_x <- cov(data_num)
iS <- solve(S_x)
e <- eigen(iS)
V <- e$vectors
B <- V %*% diag(sqrt(e$values)) %*% t(V)
Xtil <- scale(data_num,scale = FALSE)
data_num.S <- Xtil %*% B

fit.mahalanobis = kmeans(data_num.S, centers=3, nstart=100)
groups = fit.mahalanobis$cluster
centers=fit.mahalanobis$centers
colnames(centers)=colnames(X)
centers
##     Fertility GNI_Capita_PPP Adult_Mortality Infant_Mortality Life_Expentancy
## 1 -0.03367330     -0.3549929     0.011067101      -0.02552415     -0.06443750
## 2  0.14386577      1.5077412    -0.007011833       0.09847801      0.28161306
## 3  0.05069208      0.6817621    -0.681136027       0.21285153     -0.00711553
##   Under5_mortality         CO2 Cell_phones   Inflation Business_Freedom
## 1      -0.04268396  0.01325338  0.03224284 -0.10531093      0.008814855
## 2       0.17825911 -0.06082781 -0.15289425 -0.09580018     -0.037558307
## 3       0.13197002  0.04941535  0.20127063  9.16308997     -0.014957481
##   Internet_users    Democracy Judical_Effectiveness Property_Rights
## 1    -0.03563722 -0.012433847            0.01360478     -0.02515754
## 2     0.16859189  0.054423234           -0.05418373      0.11723333
## 3    -0.21588603 -0.002746367           -0.08551246     -0.12300683
##   Unemployment GDP_capita_PPP Education_Equality Economic_Freedom
## 1   0.02270471     -0.2358220        0.002056248      0.007289119
## 2  -0.10327644      1.0876858       -0.016393959     -0.039286241
## 3   0.06932188     -0.9676286        0.122450446      0.123406376
i=1  # plotting the centers in cluster 1
bar1=barplot(centers[i,], las=2, col="darkblue", ylim=c(-2,2), main=paste("Cluster", i,": Group center in blue, global center in red"))
points(bar1,y=apply(X, 2, quantile, 0.50),col="red",pch=19)

# It stands out that Democracy, which was a variable not very used until now, it is relevant for this factor.

i=2  # plotting the centers in cluster 2
bar2=barplot(centers[i,], las=2, col="darkblue", ylim=c(-2,2), main=paste("Cluster", i,": Group center in blue, global center in red"))
points(bar2,y=apply(X, 2, quantile, 0.50),col="red",pch=19)

# A bit high infant mortality, low democracy and economic freedom score.

i=3  # plotting the centers in cluster 3
bar3=barplot(centers[i,], las=2, col="darkblue", ylim=c(-2,2), main=paste("Cluster", i,": Group center in blue, global center in red"))
points(bar3,y=apply(X, 2, quantile, 0.50),col="red",pch=19)

# This third group is very strange. Specially beacause inflation it is nota variable that many countries have high.

# Let's check how many countries are there in group 3
barplot(table(groups), col="blue")

# There is only 1 country.

# Cluspot
fviz_cluster(fit.mahalanobis, data = X, geom = c("point"),ellipse.type = 'norm', pointsize=1)+
  theme_minimal()+geom_text(label=names,hjust=0, vjust=0,size=2,check_overlap = T)+scale_fill_brewer(palette="Paired")
## Too few points to calculate an ellipse

# The only country in group 3 is Venezuela (which is a country with high inflation as we saw). 
# We should reduce the number of factors, as we have seen that with three factor we have a group of just one country.
# Mahalanobis with 2 centers:
S_x <- cov(data_num)
iS <- solve(S_x)
e <- eigen(iS)
V <- e$vectors
B <- V %*% diag(sqrt(e$values)) %*% t(V)
Xtil <- scale(data_num,scale = FALSE)
data_num.S <- Xtil %*% B

fit.mahalanobis = kmeans(data_num.S, centers=2, nstart=100)
groups = fit.mahalanobis$cluster
centers=fit.mahalanobis$centers
colnames(centers)=colnames(X)
centers
##       Fertility GNI_Capita_PPP Adult_Mortality Infant_Mortality Life_Expentancy
## 1  0.0506920828    0.681762058    -0.681136027      0.212851529   -7.115530e-03
## 2 -0.0005727919   -0.007703526     0.007696452     -0.002405102    8.040147e-05
##   Under5_mortality           CO2  Cell_phones  Inflation Business_Freedom
## 1      0.131970016  0.0494153525  0.201270630  9.1630900    -0.0149574814
## 2     -0.001491187 -0.0005583656 -0.002274244 -0.1035377     0.0001690111
##   Internet_users     Democracy Judical_Effectiveness Property_Rights
## 1    -0.21588603 -2.746367e-03         -0.0855124571    -0.123006834
## 2     0.00243939  3.103239e-05          0.0009662425     0.001389908
##    Unemployment GDP_capita_PPP Education_Equality Economic_Freedom
## 1  0.0693218809    -0.96762859        0.122450446      0.123406376
## 2 -0.0007832981     0.01093366       -0.001383621     -0.001394422
i=1  # plotting the centers in cluster 1
bar1=barplot(centers[i,], las=2, col="darkblue", ylim=c(-2,2), main=paste("Cluster", i,": Group center in blue, global center in red"))
points(bar1,y=apply(X, 2, quantile, 0.50),col="red",pch=19)

# Democracy and judical effectiveness are relevant


i=2  # plotting the centers in cluster 2
bar2=barplot(centers[i,], las=2, col="darkblue", ylim=c(-2,2), main=paste("Cluster", i,": Group center in blue, global center in red"))
points(bar2,y=apply(X, 2, quantile, 0.50),col="red",pch=19)

# Low decmocracy score

barplot(table(groups), col="blue")

# Cluspot
fviz_cluster(fit.mahalanobis, data = X, geom = c("point"),ellipse.type = 'norm', pointsize=1)+
  theme_minimal()+geom_text(label=names,hjust=0, vjust=0,size=2,check_overlap = T)+scale_fill_brewer(palette="Paired")
## Too few points to calculate an ellipse

# This classifications also makes sense, countries are divided in 2, the wealthier and with higher living standard, and the poorer.

How similar are the clusters?

adjustedRandIndex(fit$cluster, fit.mahalanobis$cluster) 
## [1] -0.007383256
# The value close to 1 indicates a high correlation. As we have obtain 0.16,
# we can see that the clusters change significantly depending on the method we use

PAM

# How many groups?
fviz_nbclust(scale(X), pam, method = 'silhouette', k.max = 10)

fviz_nbclust(scale(X), pam, method = 'gap_stat', k.max = 10, nboot = 500)

fviz_nbclust(scale(X), pam, method = 'wss', k.max = 10, nboot = 500)

# Let's select 3 centers

# Visualization of clusters
fit.pam <- eclust(X, "pam", stand=TRUE, k=3, graph=F)

fviz_cluster(fit.pam, data = X, geom = c("point"), pointsize=1)+
  theme_minimal()+geom_text(label=names,hjust=0, vjust=0,size=2,check_overlap = F)+scale_fill_brewer(palette="Paired")

centers2=fit.pam$medoids

barplot(centers2[1,], las=2, col="darkblue", ylim = c(-2,2))

# High fertility and mortality
barplot(centers2[2,], las=2, col="darkblue", ylim = c(-2,2))

barplot(centers2[3,], las=2, col="darkblue", ylim = c(-2,2))

# High GDP, freedom, Life expectancy (this is the group of the best countries)


adjustedRandIndex(fit$cluster, fit.pam$clustering) 
## [1] 0.8150562
# Very similar the kmeans and the pam in this case.
map = data.frame(country=names, value=as.factor(fit.pam$clustering))
#map = data.frame(country=names, value=fit.kmeans$cluster)

#Convert the country code into iso3c using the function countrycode()
map$country = countrycode(map$country, 'country.name', 'iso3c')
## Warning in countrycode_convert(sourcevar = sourcevar, origin = origin, destination = dest, : Some values were not matched unambiguously: Micronesia
#Create data object supporting the map
matched <- joinCountryData2Map(map, joinCode = "ISO3",nameJoinColumn = "country")
## 178 codes from your data successfully matched countries in the map
## 1 codes from your data failed to match with a country code in the map
## 65 codes from the map weren't represented in your data
#Draw the map
mapCountryData(matched,nameColumnToPlot="value",missingCountryCol = "white",borderCol = "#C7D9FF",catMethod = "pretty", colourPalette = "rainbow", mapTitle = c("Clusters"), lwd=1)
## using catMethod='categorical' for non numeric data in mapCountryData

# Now we can see which country belong to each group but in the map, that is more visual.

KERNEL KMEANS

fit.ker <- kkmeans(as.matrix(X), centers=3, kernel="rbfdot") # Radial Basis kernel (Gaussian)
## Using automatic sigma estimation (sigest) for RBF or laplace kernel
# By default, Gaussian kernel is used
# By default, sigma parameter is estimated

centers(fit.ker)
##            [,1]      [,2]       [,3]       [,4]       [,5]       [,6]
## [1,] -0.8443745  1.285569 -0.8385352 -0.8482764  0.9733174 -0.7685035
## [2,]  1.0213562 -0.573755  0.9616292  1.0658178 -1.0583195  1.0516781
## [3,] -0.3573890 -0.351814 -0.3068331 -0.3953265  0.3033075 -0.4367544
##            [,7]       [,8]       [,9]       [,10]      [,11]       [,12]
## [1,]  0.5595351  1.3005775 -0.1352464  1.14359718  1.4080298  0.74972753
## [2,] -0.2163829 -0.7679019  0.2273149 -0.93123076 -0.6784393 -0.49933477
## [3,] -0.1835656 -0.1847728 -0.1154241  0.07128218 -0.3396482 -0.05476961
##           [,13]      [,14]      [,15]      [,16]      [,17]       [,18]
## [1,]  1.2032798  1.3116217 -0.2270884  1.2122482  0.3503976  1.11800710
## [2,] -0.7728403 -0.8044215 -0.1568437 -0.5974878 -0.6144103 -0.75322875
## [3,] -0.1139886 -0.1589517  0.2978885 -0.2802020  0.3223067 -0.07381047
size(fit.ker)
## [1] 47 63 69
withinss(fit.ker)
## [1] 1434.8907 2155.9962  493.2292
object.ker = list(data = X, cluster = fit.ker@.Data)
fviz_cluster(object.ker, geom = c("point"), ellipse=F,pointsize=2)+
  theme_minimal()+geom_text(label=names,hjust=0, vjust=0,size=3,check_overlap = T)+scale_fill_brewer(palette="Paired")

# We get similar clusters to the ones that we have obtained with previous methods.

HIERARCHICAL CLUSTERING

?dist #stats
## starting httpd help server ... done
?hclust
d = dist(scale(X), method='euclidean')
hc <- hclust(d, method = 'ward.D2') # Ward's minimum variance method

# Visualization with a dendrogram
# Classical dendrogram:
hc$labels <- names

fviz_dend(x = hc, 
          k=3,
          palette = "jco", 
          rect = TRUE, rect_fill = TRUE, 
          rect_border = "jco"          
)
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.

# Difficult to visualize the countries
# Let's use a phylogenic tree:
fviz_dend(x = hc,
          k = 3,
          color_labels_by_k = TRUE,
          cex = 0.8,
          type = "phylogenic",
          repel = TRUE)+  labs(title="Socio-economic-health tree clustering of the world") + theme(axis.text.x=element_blank(),axis.text.y=element_blank())
## Warning: ggrepel: 28 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

# Much better to vusalize it

In a map

groups.hc = cutree(hc, k = 3)

# Map our PCA index in a map:
map = data.frame(country=names, value=as.factor(groups.hc))
#Convert the country code into iso3c using the function countrycode()
map$country = countrycode(map$country, 'country.name', 'iso3c')
## Warning in countrycode_convert(sourcevar = sourcevar, origin = origin, destination = dest, : Some values were not matched unambiguously: Micronesia
#Create data object supporting the map
matched <- joinCountryData2Map(map, joinCode = "ISO3",
                               nameJoinColumn = "country")
## 178 codes from your data successfully matched countries in the map
## 1 codes from your data failed to match with a country code in the map
## 65 codes from the map weren't represented in your data
#Draw the map
mapCountryData(matched,nameColumnToPlot="value",missingCountryCol = "white",
               borderCol = "#C7D9FF",
               catMethod = "pretty", colourPalette = "rainbow",
               mapTitle = c("Clusters"), lwd=1)
## using catMethod='categorical' for non numeric data in mapCountryData

# Very similar to the one we obtained before,although there are slight 
# differences (Uruguay, for instance, is now in the group of high-developed countries)

EM CLUSTERING

res.Mclust <- Mclust(X)  # X is already scale
summary(res.Mclust)
## ---------------------------------------------------- 
## Gaussian finite mixture model fitted by EM algorithm 
## ---------------------------------------------------- 
## 
## Mclust VVI (diagonal, varying volume and shape) model with 6 components: 
## 
##  log-likelihood   n  df       BIC       ICL
##       -1288.257 179 221 -3722.926 -3724.428
## 
## Clustering table:
##  1  2  3  4  5  6 
## 47 25 47 27 20 13
# The clustering is probabilistic: for each country we don't have a unique group but the probabilities the country belongs to each of the groups

head(res.Mclust$z)
##            [,1]         [,2]          [,3]          [,4]         [,5]
## 1  1.000000e+00 0.000000e+00 1.582198e-104  0.000000e+00 1.979405e-13
## 2  1.314821e-46 2.168022e-01  7.831977e-01  7.679911e-95 2.132436e-08
## 3  1.382921e-71 7.364054e-23  9.998201e-01  0.000000e+00 1.798544e-04
## 4  1.000000e+00 0.000000e+00 4.847608e-116  0.000000e+00 2.372697e-08
## 5 2.586627e-154 1.369190e-55  3.355358e-83  0.000000e+00 9.859757e-01
## 6  2.270362e-27 1.293017e-11  9.999996e-01 3.553950e-236 4.041192e-07
##            [,6]
## 1 1.645435e-147
## 2  1.105216e-10
## 3  4.999184e-11
## 4 3.321889e-136
## 5  1.402431e-02
## 6  9.150435e-21
# probabilidad de cada país de estar en cada cluster

# Of course the tool assign the group with highest probability  
head(res.Mclust$classification)
## 1 2 3 4 5 6 
## 1 3 3 1 5 3
# te asigna a cada cluster en función a la probabilidad

fviz_mclust(object = res.Mclust, what = "BIC", pallete = "jco")
## Warning: `gather_()` was deprecated in tidyr 1.2.0.
## Please use `gather()` instead.

# 4 groups is what by the graph we can see it is ok

Clusplot

fviz_mclust(object = res.Mclust, what = "classification", geom = "point", pallete = "jco")

# How similar are the clusters?
# Remember: The closer to 1 the more agreement
adjustedRandIndex(res.Mclust$classification, fit.pam$clustering) 
## [1] 0.4832715
adjustedRandIndex(res.Mclust$classification, groups.hc) 
## [1] 0.3671007
# Between 0.5 and 0.6 in both cases, well we can cocnlcude that
# they are somehow related.

# Visualization in the map  
groups.mclust = res.Mclust$classification

# Map our PCA index in a map:
map = data.frame(country=names, value=groups.mclust)
#Convert the country code into iso3c using the function countrycode()
map$country = countrycode(map$country, 'country.name', 'iso3c')
## Warning in countrycode_convert(sourcevar = sourcevar, origin = origin, destination = dest, : Some values were not matched unambiguously: Micronesia
#Create data object supporting the map
matched <- joinCountryData2Map(map, joinCode = "ISO3",
                               nameJoinColumn = "country")
## 178 codes from your data successfully matched countries in the map
## 1 codes from your data failed to match with a country code in the map
## 65 codes from the map weren't represented in your data
#Draw the map
mapCountryData(matched,nameColumnToPlot="value",missingCountryCol = "white",
               borderCol = "#C7D9FF",
               catMethod = "pretty", colourPalette = "topo",
               mapTitle = c("Clusters"), lwd=1)
## You asked for 7 categories, 5 were used due to pretty() classification

# Now we have more groups, but we can observe that in all the clusters we have made, broadly, North America, Europe, Australia, Japan are together and are selected as the best countries to live.

Heatmaps

# A heat map is a false color image (based on data frame X) with a 
# dendrogram added to the left side and to the top
heatmap(scale(X), scale = "none",
        distfun = function(x){dist(x, method = "euclidean")},
        hclustfun = function(x){hclust(x, method = "ward.D2")},
        cexRow = 0.7)

# The darker the color, the higher the correlation
# The higher on the dendogram, the more important. 
# For instance, obervation 89 is highly explained by CO2.
# Also, the observation 8 (Austria) is mainly explained by the 
# 'more important' variables (the ones that explained a bigger part)
# of our dataset (cell phones, life expectancy, economic freedom, internet users...)

SOURCES

All of my code is inspired in what we have seen in class. That is the main reference I have used. Apart froma that, I found some interesting ways to do the graphs in: https://r-graph-gallery.com/ggplot2-package.html Also, to solve the doubts that I have while making the code I usually look for the answer in https://stackoverflow.com/ and in https://rpubs.com/